diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index 2e020e2f92..3684b005d5 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -26,52 +26,23 @@ set(EXEC_FILES arrow/orc_reader.cpp arrow/parquet_reader.cpp arrow/parquet_row_group_reader.cpp - analytic_eval_node.cpp - blocking_join_node.cpp - broker_scan_node.cpp base_scanner.cpp - broker_scanner.cpp - cross_join_node.cpp - csv_scan_node.cpp - csv_scanner.cpp data_sink.cpp decompressor.cpp - empty_set_node.cpp exec_node.cpp - exchange_node.cpp - hash_join_node.cpp hash_table.cpp - merge_node.cpp - scan_node.cpp - select_node.cpp text_converter.cpp - topn_node.cpp - sort_exec_exprs.cpp - olap_scan_node.cpp - olap_scanner.cpp olap_common.cpp tablet_info.cpp tablet_sink.cpp plain_binary_line_reader.cpp plain_text_line_reader.cpp - csv_scan_node.cpp - csv_scanner.cpp - table_function_node.cpp - es_http_scan_node.cpp - es_http_scanner.cpp es/es_predicate.cpp es/es_scan_reader.cpp es/es_scroll_query.cpp es/es_scroll_parser.cpp es/es_query_builder.cpp - spill_sort_node.cc - union_node.cpp - set_operation_node.cpp - intersect_node.cpp - except_node.cpp - repeat_node.cpp schema_scanner.cpp - schema_scan_node.cpp schema_scanner/schema_tables_scanner.cpp schema_scanner/schema_dummy_scanner.cpp schema_scanner/schema_schemata_scanner.cpp @@ -89,18 +60,10 @@ set(EXEC_FILES schema_scanner/schema_partitions_scanner.cpp schema_scanner/schema_rowsets_scanner.cpp schema_scanner/schema_backends_scanner.cpp - - partitioned_hash_table.cc - partitioned_aggregation_node.cc - odbc_scan_node.cpp - parquet_scanner.cpp - parquet_writer.cpp - orc_scanner.cpp + scan_node.cpp odbc_connector.cpp table_connector.cpp - json_scanner.cpp - assert_num_rows_node.cpp - + schema_scanner.cpp ) if (WITH_MYSQL) set(EXEC_FILES diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp deleted file mode 100644 index f46c463d29..0000000000 --- a/be/src/exec/analytic_eval_node.cpp +++ /dev/null @@ -1,939 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/analytic-eval-node.cc -// and modified by Doris - -#include "exec/analytic_eval_node.h" - -#include "exprs/agg_fn_evaluator.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "udf/udf_internal.h" - -namespace doris { - -using doris_udf::BigIntVal; - -AnalyticEvalNode::AnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _window(tnode.analytic_node.window), - _intermediate_tuple_desc( - descs.get_tuple_descriptor(tnode.analytic_node.intermediate_tuple_id)), - _result_tuple_desc(descs.get_tuple_descriptor(tnode.analytic_node.output_tuple_id)), - _buffered_tuple_desc(nullptr), - _partition_by_eq_expr_ctx(nullptr), - _order_by_eq_expr_ctx(nullptr), - _rows_start_offset(0), - _rows_end_offset(0), - _has_first_val_null_offset(false), - _first_val_null_offset(0), - _last_result_idx(-1), - _prev_pool_last_result_idx(-1), - _prev_pool_last_window_idx(-1), - _curr_tuple(nullptr), - _dummy_result_tuple(nullptr), - _curr_partition_idx(-1), - _prev_input_row(nullptr), - _block_mgr_client(nullptr), - _input_eos(false), - _evaluation_timer(nullptr) { - if (tnode.analytic_node.__isset.buffered_tuple_id) { - _buffered_tuple_desc = descs.get_tuple_descriptor(tnode.analytic_node.buffered_tuple_id); - } - - if (!tnode.analytic_node.__isset.window) { - _fn_scope = AnalyticEvalNode::PARTITION; - } else if (tnode.analytic_node.window.type == TAnalyticWindowType::RANGE) { - _fn_scope = AnalyticEvalNode::RANGE; - DCHECK(!_window.__isset.window_start) << "RANGE windows must have UNBOUNDED PRECEDING"; - DCHECK(!_window.__isset.window_end || - _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW) - << "RANGE window end bound must be CURRENT ROW or UNBOUNDED FOLLOWING"; - } else { - DCHECK_EQ(tnode.analytic_node.window.type, TAnalyticWindowType::ROWS); - _fn_scope = AnalyticEvalNode::ROWS; - - if (_window.__isset.window_start) { - TAnalyticWindowBoundary b = _window.window_start; - - if (b.__isset.rows_offset_value) { - _rows_start_offset = b.rows_offset_value; - - if (b.type == TAnalyticWindowBoundaryType::PRECEDING) { - _rows_start_offset *= -1; - } - } else { - DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::CURRENT_ROW); - _rows_start_offset = 0; - } - } - - if (_window.__isset.window_end) { - TAnalyticWindowBoundary b = _window.window_end; - - if (b.__isset.rows_offset_value) { - _rows_end_offset = b.rows_offset_value; - - if (b.type == TAnalyticWindowBoundaryType::PRECEDING) { - _rows_end_offset *= -1; - } - } else { - DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::CURRENT_ROW); - _rows_end_offset = 0; - } - } - } - - VLOG_ROW << "tnode=" << apache::thrift::ThriftDebugString(tnode); -} - -Status AnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - const TAnalyticNode& analytic_node = tnode.analytic_node; - bool has_lead_fn = false; - - for (int i = 0; i < analytic_node.analytic_functions.size(); ++i) { - AggFnEvaluator* evaluator = nullptr; - RETURN_IF_ERROR(AggFnEvaluator::create(_pool, analytic_node.analytic_functions[i], true, - &evaluator)); - _evaluators.push_back(evaluator); - const TFunction& fn = analytic_node.analytic_functions[i].nodes[0].fn; - _is_lead_fn.push_back("lead" == fn.name.function_name); - has_lead_fn = has_lead_fn || _is_lead_fn.back(); - } - - DCHECK(!has_lead_fn || !_window.__isset.window_start); - DCHECK(_fn_scope != PARTITION || analytic_node.order_by_exprs.empty()); - DCHECK(_window.__isset.window_end || !_window.__isset.window_start) - << "UNBOUNDED FOLLOWING is only supported with UNBOUNDED PRECEDING."; - - if (analytic_node.__isset.partition_by_eq) { - DCHECK(analytic_node.__isset.buffered_tuple_id); - RETURN_IF_ERROR(Expr::create_expr_tree(_pool, analytic_node.partition_by_eq, - &_partition_by_eq_expr_ctx)); - } - - if (analytic_node.__isset.order_by_eq) { - DCHECK(analytic_node.__isset.buffered_tuple_id); - RETURN_IF_ERROR( - Expr::create_expr_tree(_pool, analytic_node.order_by_eq, &_order_by_eq_expr_ctx)); - } - - return Status::OK(); -} - -Status AnalyticEvalNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - DCHECK(child(0)->row_desc().is_prefix_of(row_desc())); - _child_tuple_desc = child(0)->row_desc().tuple_descriptors()[0]; - _curr_tuple_pool.reset(new MemPool(mem_tracker_held())); - _prev_tuple_pool.reset(new MemPool(mem_tracker_held())); - _mem_pool.reset(new MemPool(mem_tracker_held())); - - _evaluation_timer = ADD_TIMER(runtime_profile(), "EvaluationTime"); - DCHECK_EQ(_result_tuple_desc->slots().size(), _evaluators.size()); - - for (int i = 0; i < _evaluators.size(); ++i) { - doris_udf::FunctionContext* ctx; - RETURN_IF_ERROR(_evaluators[i]->prepare(state, child(0)->row_desc(), _mem_pool.get(), - _intermediate_tuple_desc->slots()[i], - _result_tuple_desc->slots()[i], &ctx)); - _fn_ctxs.push_back(ctx); - state->obj_pool()->add(ctx); - } - - if (_partition_by_eq_expr_ctx != nullptr || _order_by_eq_expr_ctx != nullptr) { - DCHECK(_buffered_tuple_desc != nullptr); - std::vector tuple_ids; - tuple_ids.push_back(child(0)->row_desc().tuple_descriptors()[0]->id()); - tuple_ids.push_back(_buffered_tuple_desc->id()); - RowDescriptor cmp_row_desc(state->desc_tbl(), tuple_ids, std::vector(2, false)); - - if (_partition_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR(_partition_by_eq_expr_ctx->prepare(state, cmp_row_desc)); - //AddExprCtxToFree(_partition_by_eq_expr_ctx); - } - - if (_order_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR(_order_by_eq_expr_ctx->prepare(state, cmp_row_desc)); - //AddExprCtxToFree(_order_by_eq_expr_ctx); - } - } - - _child_tuple_cmp_row = reinterpret_cast(_mem_pool->allocate(sizeof(Tuple*) * 2)); - return Status::OK(); -} - -Status AnalyticEvalNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - //RETURN_IF_ERROR(QueryMaintenance(state)); - RETURN_IF_ERROR(child(0)->open(state)); - RETURN_IF_ERROR(state->block_mgr2()->register_client(2, state, &_block_mgr_client)); - _input_stream.reset(new BufferedTupleStream2(state, child(0)->row_desc(), state->block_mgr2(), - _block_mgr_client, false, true)); - RETURN_IF_ERROR(_input_stream->init(id(), runtime_profile(), true)); - - bool got_read_buffer; - RETURN_IF_ERROR(_input_stream->prepare_for_read(true, &got_read_buffer)); - if (!got_read_buffer) { - std::string msg( - "Failed to acquire initial read buffer for analytic function " - "evaluation. Reducing query concurrency or increasing the memory limit may " - "help this query to complete successfully."); - RETURN_LIMIT_EXCEEDED(state, msg); - } - - DCHECK_EQ(_evaluators.size(), _fn_ctxs.size()); - - for (int i = 0; i < _evaluators.size(); ++i) { - RETURN_IF_ERROR(_evaluators[i]->open(state, _fn_ctxs[i])); - - if ("first_value_rewrite" == _evaluators[i]->fn_name() && - _fn_ctxs[i]->get_num_args() == 2) { - DCHECK(!_has_first_val_null_offset); - _first_val_null_offset = - reinterpret_cast(_fn_ctxs[i]->get_constant_arg(1))->val; - VLOG_FILE << id() << " FIRST_VAL rewrite null offset: " << _first_val_null_offset; - _has_first_val_null_offset = true; - } - } - - if (_partition_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR(_partition_by_eq_expr_ctx->open(state)); - } - if (_order_by_eq_expr_ctx != nullptr) { - RETURN_IF_ERROR(_order_by_eq_expr_ctx->open(state)); - } - - // An intermediate tuple is only allocated once and is reused. - _curr_tuple = Tuple::create(_intermediate_tuple_desc->byte_size(), _mem_pool.get()); - AggFnEvaluator::init(_evaluators, _fn_ctxs, _curr_tuple); - _dummy_result_tuple = Tuple::create(_result_tuple_desc->byte_size(), _mem_pool.get()); - - // Initialize state for the first partition. - init_next_partition(0); - - // Fetch the first input batch so that some _prev_input_row can be set here to avoid - // special casing in GetNext(). - _prev_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - _curr_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - - while (!_input_eos && _prev_input_row == nullptr) { - RETURN_IF_ERROR(child(0)->get_next(state, _curr_child_batch.get(), &_input_eos)); - if (_curr_child_batch->num_rows() > 0) { - _prev_input_row = _curr_child_batch->get_row(0); - process_child_batches(state); - } else { - // Empty batch, still need to reset. - _curr_child_batch->reset(); - } - } - - if (_prev_input_row == nullptr) { - DCHECK(_input_eos); - // Delete _curr_child_batch to indicate there is no batch to process in GetNext() - _curr_child_batch.reset(); - } - - return Status::OK(); -} - -std::string debug_window_bound_string(const TAnalyticWindowBoundary& b) { - if (b.type == TAnalyticWindowBoundaryType::CURRENT_ROW) { - return "CURRENT_ROW"; - } - - std::stringstream ss; - - if (b.__isset.rows_offset_value) { - ss << b.rows_offset_value; - } else { - // TODO: Return debug string when range offsets are supported - DCHECK(false) << "Range offsets not yet implemented"; - } - - if (b.type == TAnalyticWindowBoundaryType::PRECEDING) { - ss << " PRECEDING"; - } else { - DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::FOLLOWING); - ss << " FOLLOWING"; - } - - return ss.str(); -} - -std::string AnalyticEvalNode::debug_window_string() const { - std::stringstream ss; - - if (_fn_scope == PARTITION) { - ss << "NO WINDOW"; - return ss.str(); - } - - ss << "{type="; - - if (_fn_scope == RANGE) { - ss << "RANGE"; - } else { - ss << "ROWS"; - } - - ss << ", start="; - - if (_window.__isset.window_start) { - ss << debug_window_bound_string(_window.window_start); - } else { - ss << "UNBOUNDED_PRECEDING"; - } - - ss << ", end="; - - if (_window.__isset.window_end) { - ss << debug_window_bound_string(_window.window_end) << "}"; - } else { - ss << "UNBOUNDED_FOLLOWING"; - } - - return ss.str(); -} - -std::string AnalyticEvalNode::debug_state_string(bool detailed) const { - std::stringstream ss; - ss << "num_returned=" << _input_stream->rows_returned() - << " num_rows=" << _input_stream->num_rows() - << " _curr_partition_idx=" << _curr_partition_idx << " last_result_idx=" << _last_result_idx; - - if (detailed) { - ss << " result_tuples idx: ["; - - for (std::list>::const_iterator it = _result_tuples.begin(); - it != _result_tuples.end(); ++it) { - ss << it->first; - - if (*it != _result_tuples.back()) { - ss << ", "; - } - } - - ss << "]"; - - if (_fn_scope == ROWS && _window.__isset.window_start) { - ss << " window_tuples idx: ["; - - for (std::list>::const_iterator it = _window_tuples.begin(); - it != _window_tuples.end(); ++it) { - ss << it->first; - - if (*it != _window_tuples.back()) { - ss << ", "; - } - } - - ss << "]"; - } - } else { - if (_fn_scope == ROWS && _window.__isset.window_start) { - if (_window_tuples.empty()) { - ss << " window_tuples empty"; - } else { - ss << " window_tuples idx range: (" << _window_tuples.front().first << "," - << _window_tuples.back().first << ")"; - } - } - - if (_result_tuples.empty()) { - ss << " result_tuples empty"; - } else { - ss << " result_tuples idx range: (" << _result_tuples.front().first << "," - << _result_tuples.back().first << ")"; - } - } - - return ss.str(); -} - -void AnalyticEvalNode::add_result_tuple(int64_t stream_idx) { - VLOG_ROW << id() << " add_result_tuple idx=" << stream_idx; - DCHECK(_curr_tuple != nullptr); - Tuple* result_tuple = Tuple::create(_result_tuple_desc->byte_size(), _curr_tuple_pool.get()); - - AggFnEvaluator::get_value(_evaluators, _fn_ctxs, _curr_tuple, result_tuple); - DCHECK_GT(stream_idx, _last_result_idx); - _result_tuples.push_back(std::pair(stream_idx, result_tuple)); - _last_result_idx = stream_idx; - VLOG_ROW << id() << " Added result tuple, final state: " << debug_state_string(true); -} - -inline void AnalyticEvalNode::try_add_result_tuple_for_prev_row(bool next_partition, - int64_t stream_idx, TupleRow* row) { - // The analytic fns are finalized after the previous row if we found a new partition - // or the window is a RANGE and the order by exprs changed. For ROWS windows we do not - // need to compare the current row to the previous row. - VLOG_ROW << id() << " try_add_result_tuple_for_prev_row partition=" << next_partition - << " idx=" << stream_idx; - if (_fn_scope == ROWS) { - return; - } - if (next_partition || (_fn_scope == RANGE && _window.__isset.window_end && - !prev_row_compare(_order_by_eq_expr_ctx))) { - add_result_tuple(stream_idx - 1); - } -} - -inline void AnalyticEvalNode::try_add_result_tuple_for_curr_row(int64_t stream_idx, TupleRow* row) { - VLOG_ROW << id() << " try_add_result_tuple_for_curr_row idx=" << stream_idx; - - // We only add results at this point for ROWS windows (unless unbounded following) - if (_fn_scope != ROWS || !_window.__isset.window_end) { - return; - } - - // Nothing to add if the end offset is before the start of the partition. - if (stream_idx - _rows_end_offset < _curr_partition_idx) { - return; - } - - add_result_tuple(stream_idx - _rows_end_offset); -} - -inline void AnalyticEvalNode::try_remove_rows_before_window(int64_t stream_idx) { - if (_fn_scope != ROWS || !_window.__isset.window_start) { - return; - } - - // The start of the window may have been before the current partition, in which case - // there is no tuple to remove in _window_tuples. Check the index of the row at which - // tuples from _window_tuples should begin to be removed. - int64_t remove_idx = - stream_idx - _rows_end_offset + std::min(_rows_start_offset, 0L) - 1; - - if (remove_idx < _curr_partition_idx) { - return; - } - - VLOG_ROW << id() << " Remove idx=" << remove_idx << " stream_idx=" << stream_idx; - DCHECK(!_window_tuples.empty()) << debug_state_string(true); - DCHECK_EQ(remove_idx + std::max(_rows_start_offset, 0L), _window_tuples.front().first) - << debug_state_string(true); - TupleRow* remove_row = reinterpret_cast(&_window_tuples.front().second); - AggFnEvaluator::remove(_evaluators, _fn_ctxs, remove_row, _curr_tuple); - _window_tuples.pop_front(); -} - -inline void AnalyticEvalNode::try_add_remaining_results(int64_t partition_idx, - int64_t prev_partition_idx) { - DCHECK_LT(prev_partition_idx, partition_idx); - - // For PARTITION, RANGE, or ROWS with UNBOUNDED PRECEDING: add a result tuple for the - // remaining rows in the partition that do not have an associated result tuple yet. - if (_fn_scope != ROWS || !_window.__isset.window_end) { - if (_last_result_idx < partition_idx - 1) { - add_result_tuple(partition_idx - 1); - } - - return; - } - - // lead() is re-written to a ROWS window with an end bound FOLLOWING. Any remaining - // results need the default value (set by Init()). If this is the case, the start bound - // is UNBOUNDED PRECEDING (DCHECK in Init()). - for (int i = 0; i < _evaluators.size(); ++i) { - if (_is_lead_fn[i]) { - _evaluators[i]->init(_fn_ctxs[i], _curr_tuple); - } - } - - // If the start bound is not UNBOUNDED PRECEDING and there are still rows in the - // partition for which we need to produce result tuples, we need to continue removing - // input tuples at the start of the window from each row that we're adding results for. - VLOG_ROW << id() << " try_add_remaining_results prev_partition_idx=" << prev_partition_idx - << " " << debug_state_string(true); - - for (int64_t next_result_idx = _last_result_idx + 1; next_result_idx < partition_idx; - ++next_result_idx) { - if (_window_tuples.empty()) { - break; - } - - if (next_result_idx + _rows_start_offset > _window_tuples.front().first) { - DCHECK_EQ(next_result_idx + _rows_start_offset - 1, _window_tuples.front().first); - // For every tuple that is removed from the window: Remove() from the evaluators - // and add the result tuple at the next index. - VLOG_ROW << id() << " Remove window_row_idx=" << _window_tuples.front().first - << " for result row at idx=" << next_result_idx; - TupleRow* remove_row = reinterpret_cast(&_window_tuples.front().second); - AggFnEvaluator::remove(_evaluators, _fn_ctxs, remove_row, _curr_tuple); - _window_tuples.pop_front(); - } - - add_result_tuple(_last_result_idx + 1); - } - - // If there are still rows between the row with the last result (add_result_tuple() may - // have updated _last_result_idx) and the partition boundary, add the current results - // for the remaining rows with the same result tuple (_curr_tuple is not modified). - if (_last_result_idx < partition_idx - 1) { - add_result_tuple(partition_idx - 1); - } -} - -inline void AnalyticEvalNode::init_next_partition(int64_t stream_idx) { - VLOG_FILE << id() << " init_next_partition idx=" << stream_idx; - DCHECK_LT(_curr_partition_idx, stream_idx); - int64_t prev_partition_stream_idx = _curr_partition_idx; - _curr_partition_idx = stream_idx; - - // If the window has an end bound preceding the current row, we will have output - // tuples for rows beyond the partition so they should be removed. If there was only - // one result tuple left in the partition it will remain in _result_tuples because it - // is the empty result tuple (i.e. called Init() and never Update()) that was added - // when initializing the previous partition so that the first rows have the default - // values (where there are no preceding rows in the window). - bool removed_results_past_partition = false; - - while (!_result_tuples.empty() && _last_result_idx >= _curr_partition_idx) { - removed_results_past_partition = true; - DCHECK(_window.__isset.window_end && - _window.window_end.type == TAnalyticWindowBoundaryType::PRECEDING); - VLOG_ROW << id() << " Removing result past partition idx: " << _result_tuples.back().first; - Tuple* prev_result_tuple = _result_tuples.back().second; - _result_tuples.pop_back(); - - if (_result_tuples.empty() || _result_tuples.back().first < prev_partition_stream_idx) { - // prev_result_tuple was the last result tuple in the partition, add it back with - // the index of the last row in the partition so that all output rows in this - // partition get the default result tuple. - _result_tuples.push_back( - std::pair(_curr_partition_idx - 1, prev_result_tuple)); - } - - _last_result_idx = _result_tuples.back().first; - } - - if (removed_results_past_partition) { - VLOG_ROW << id() << " After removing results past partition: " << debug_state_string(true); - DCHECK_EQ(_last_result_idx, _curr_partition_idx - 1); - DCHECK_LE(_input_stream->rows_returned(), _last_result_idx); - } - - if (_fn_scope == ROWS && stream_idx > 0 && - (!_window.__isset.window_end || - _window.window_end.type == TAnalyticWindowBoundaryType::FOLLOWING)) { - try_add_remaining_results(stream_idx, prev_partition_stream_idx); - } - - _window_tuples.clear(); - - // Re-initialize _curr_tuple. - VLOG_ROW << id() << " Reset curr_tuple"; - // Call finalize to release resources; result is not needed but the dst tuple must be - // a tuple described by _result_tuple_desc. - AggFnEvaluator::finalize(_evaluators, _fn_ctxs, _curr_tuple, _dummy_result_tuple); - _curr_tuple->init(_intermediate_tuple_desc->byte_size()); - AggFnEvaluator::init(_evaluators, _fn_ctxs, _curr_tuple); - - // Add a result tuple containing values set by Init() (e.g. nullptr for sum(), 0 for - // count()) for output rows that have no input rows in the window. We need to add this - // result tuple before any input rows are consumed and the evaluators are updated. - if (_fn_scope == ROWS && _window.__isset.window_end && - _window.window_end.type == TAnalyticWindowBoundaryType::PRECEDING) { - if (_has_first_val_null_offset) { - // Special handling for FIRST_VALUE which has the window rewritten in the FE - // in order to evaluate the fn efficiently with a trivial agg fn implementation. - // This occurs when the original analytic window has a start bound X PRECEDING. In - // that case, the window is rewritten to have an end bound X PRECEDING which would - // normally mean we add the newly Init()'d result tuple X rows down (so that those - // first rows have the initial value because they have no rows in their windows). - // However, the original query did not actually have X PRECEDING so we need to do - // one of the following: - // 1) Do not insert the initial result tuple with at all, indicated by - // _first_val_null_offset == -1. This happens when the original end bound was - // actually CURRENT ROW or Y FOLLOWING. - // 2) Insert the initial result tuple at _first_val_null_offset. This happens when - // the end bound was actually Y PRECEDING. - if (_first_val_null_offset != -1) { - add_result_tuple(_curr_partition_idx + _first_val_null_offset - 1); - } - } else { - add_result_tuple(_curr_partition_idx - _rows_end_offset - 1); - } - } -} - -inline bool AnalyticEvalNode::prev_row_compare(ExprContext* pred_ctx) { - DCHECK(pred_ctx != nullptr); - doris_udf::BooleanVal result = pred_ctx->get_boolean_val(_child_tuple_cmp_row); - DCHECK(!result.is_null); - - return result.val; -} - -Status AnalyticEvalNode::process_child_batches(RuntimeState* state) { - // Consume child batches until eos or there are enough rows to return more than an - // output batch. Ensuring there is at least one more row left after returning results - // allows us to simplify the logic dealing with _last_result_idx and _result_tuples. - while (_curr_child_batch.get() != nullptr && - num_output_rows_ready() < state->batch_size() + 1) { - RETURN_IF_CANCELLED(state); - //RETURN_IF_ERROR(QueryMaintenance(state)); - RETURN_IF_ERROR(process_child_batch(state)); - - // TODO: DCHECK that the size of _result_tuples is bounded. It shouldn't be larger - // than 2x the batch size unless the end bound has an offset preceding, in which - // case it may be slightly larger (proportional to the offset but still bounded). - if (_input_eos) { - // Already processed the last child batch. Clean up and break. - _curr_child_batch.reset(); - _prev_child_batch.reset(); - break; - } - - _prev_child_batch->reset(); - _prev_child_batch.swap(_curr_child_batch); - RETURN_IF_ERROR(child(0)->get_next(state, _curr_child_batch.get(), &_input_eos)); - } - - return Status::OK(); -} - -Status AnalyticEvalNode::process_child_batch(RuntimeState* state) { - // TODO: DCHECK input is sorted (even just first row vs _prev_input_row) - VLOG_FILE << id() << " process_child_batch: " << debug_state_string(false) - << " input batch size:" << _curr_child_batch->num_rows() - << " tuple pool size:" << _curr_tuple_pool->total_allocated_bytes(); - SCOPED_TIMER(_evaluation_timer); - // BufferedTupleStream::num_rows() returns the total number of rows that have been - // inserted into the stream (it does not decrease when we read rows), so the index of - // the next input row that will be inserted will be the current size of the stream. - int64_t stream_idx = _input_stream->num_rows(); - // Stores the stream_idx of the row that was last inserted into _window_tuples. - int64_t last_window_tuple_idx = -1; - - for (int i = 0; i < _curr_child_batch->num_rows(); ++i, ++stream_idx) { - TupleRow* row = _curr_child_batch->get_row(i); - _child_tuple_cmp_row->set_tuple(0, _prev_input_row->get_tuple(0)); - _child_tuple_cmp_row->set_tuple(1, row->get_tuple(0)); - try_remove_rows_before_window(stream_idx); - - // Every row is compared against the previous row to determine if (a) the row - // starts a new partition or (b) the row does not share the same values for the - // ordering exprs. When either of these occurs, the _evaluators are finalized and - // the result tuple is added to _result_tuples so that it may be added to output - // rows in get_next_output_batch(). When a new partition is found (a), a new, empty - // result tuple is created and initialized over the _evaluators. If the row has - // different values for the ordering exprs (b), then a new tuple is created but - // copied from _curr_tuple because the original is used for one or more previous - // row(s) but the incremental state still applies to the current row. - bool next_partition = false; - - if (_partition_by_eq_expr_ctx != nullptr) { - // _partition_by_eq_expr_ctx checks equality over the predicate exprs - next_partition = !prev_row_compare(_partition_by_eq_expr_ctx); - } - - try_add_result_tuple_for_prev_row(next_partition, stream_idx, row); - - if (next_partition) { - init_next_partition(stream_idx); - } - - // The _evaluators are updated with the current row. - if (_fn_scope != ROWS || !_window.__isset.window_start || - stream_idx - _rows_start_offset >= _curr_partition_idx) { - VLOG_ROW << id() << " Update idx=" << stream_idx; - AggFnEvaluator::add(_evaluators, _fn_ctxs, row, _curr_tuple); - - if (_window.__isset.window_start) { - VLOG_ROW << id() << " Adding tuple to window at idx=" << stream_idx; - Tuple* tuple = - row->get_tuple(0)->deep_copy(*_child_tuple_desc, _curr_tuple_pool.get()); - _window_tuples.push_back(std::pair(stream_idx, tuple)); - last_window_tuple_idx = stream_idx; - } - } - - try_add_result_tuple_for_curr_row(stream_idx, row); - - Status status = Status::OK(); - // Buffer the entire input row to be returned later with the analytic eval results. - if (UNLIKELY(!_input_stream->add_row(row, &status))) { - // AddRow returns false if an error occurs (available via status()) or there is - // not enough memory (status() is OK). If there isn't enough memory, we unpin - // the stream and continue writing/reading in unpinned mode. - // TODO: Consider re-pinning later if the output stream is fully consumed. - add_runtime_exec_option("Spilled"); - RETURN_IF_ERROR(status); - RETURN_IF_ERROR(_input_stream->unpin_stream()); - VLOG_FILE << id() << " Unpin input stream while adding row idx=" << stream_idx; - - if (!_input_stream->add_row(row, &status)) { - // Rows should be added in unpinned mode unless an error occurs. - RETURN_IF_ERROR(status); - DCHECK(false); - } - } - - _prev_input_row = row; - } - - // We need to add the results for the last row(s). - if (_input_eos) { - try_add_remaining_results(stream_idx, _curr_partition_idx); - } - - // Transfer resources to _prev_tuple_pool when enough resources have accumulated - // and the _prev_tuple_pool has already been transferred to an output batch. - - // The memory limit of _curr_tuple_pool is set by the fixed value - // The size is specified as 8MB, which is used in the extremely strict memory limit. - // Eg: exec_mem_limit < 100MB may cause memory exceeded limit problem. So change it to half of max block size to prevent the problem. - // TODO: Should we keep the buffer of _curr_tuple_pool or release the memory occupied ASAP? - if (_curr_tuple_pool->total_allocated_bytes() > state->block_mgr2()->max_block_size() / 2 && - (_prev_pool_last_result_idx == -1 || _prev_pool_last_window_idx == -1)) { - _prev_tuple_pool->acquire_data(_curr_tuple_pool.get(), false); - _prev_pool_last_result_idx = _last_result_idx; - _prev_pool_last_window_idx = last_window_tuple_idx; - VLOG_FILE << id() << " Transfer resources from curr to prev pool at idx: " << stream_idx - << ", stores tuples with last result idx: " << _prev_pool_last_result_idx - << " last window idx: " << _prev_pool_last_window_idx; - } - - return Status::OK(); -} - -Status AnalyticEvalNode::get_next_output_batch(RuntimeState* state, RowBatch* output_batch, - bool* eos) { - SCOPED_TIMER(_evaluation_timer); - VLOG_FILE << id() << " get_next_output_batch: " << debug_state_string(false) - << " tuple pool size:" << _curr_tuple_pool->total_allocated_bytes(); - - if (_input_stream->rows_returned() == _input_stream->num_rows()) { - *eos = true; - return Status::OK(); - } - - const int num_child_tuples = child(0)->row_desc().tuple_descriptors().size(); - ExprContext** ctxs = &_conjunct_ctxs[0]; - int num_ctxs = _conjunct_ctxs.size(); - - RowBatch input_batch(child(0)->row_desc(), output_batch->capacity()); - int64_t stream_idx = _input_stream->rows_returned(); - RETURN_IF_ERROR(_input_stream->get_next(&input_batch, eos)); - - for (int i = 0; i < input_batch.num_rows(); ++i) { - if (reached_limit()) { - break; - } - - DCHECK(!output_batch->is_full()); - DCHECK(!_result_tuples.empty()); - VLOG_ROW << id() << " Output row idx=" << stream_idx << " " << debug_state_string(true); - - // CopyRow works as expected: input_batch tuples form a prefix of output_batch - // tuples. - TupleRow* dest = output_batch->get_row(output_batch->add_row()); - // input_batch is from a tuple_buffer_stream, - // It can only guarantee that the life cycle is valid in a batch stage. - // If the ancestor node is a no-spilling blocking node (such as hash_join_node except_node ...) - // these node may acquire a invalid tuple pointer, - // so we should use deep_copy, and copy tuple to the tuple_pool, to ensure tuple not finalized. - // reference issue #5466 - input_batch.get_row(i)->deep_copy(dest, child(0)->row_desc().tuple_descriptors(), - output_batch->tuple_data_pool(), false); - dest->set_tuple(num_child_tuples, _result_tuples.front().second); - - if (ExecNode::eval_conjuncts(ctxs, num_ctxs, dest)) { - output_batch->commit_last_row(); - ++_num_rows_returned; - } - - // Remove the head of _result_tuples if all rows using that evaluated tuple - // have been returned. - DCHECK_LE(stream_idx, _result_tuples.front().first); - - if (stream_idx >= _result_tuples.front().first) { - _result_tuples.pop_front(); - } - - ++stream_idx; - } - - input_batch.transfer_resource_ownership(output_batch); - - if (reached_limit()) { - *eos = true; - } - - return Status::OK(); -} - -inline int64_t AnalyticEvalNode::num_output_rows_ready() const { - if (_result_tuples.empty()) { - return 0; - } - - int64_t rows_to_return = _last_result_idx - _input_stream->rows_returned(); - - if (_last_result_idx > _input_stream->num_rows()) { - // This happens when we were able to add a result tuple before consuming child rows, - // e.g. initializing a new partition with an end bound that is X preceding. The first - // X rows get the default value and we add that tuple to _result_tuples before - // consuming child rows. It's possible the result is negative, and that's fine - // because this result is only used to determine if the number of rows to return - // is at least as big as the batch size. - rows_to_return -= _last_result_idx - _input_stream->num_rows(); - } else { - DCHECK_GE(rows_to_return, 0); - } - - return rows_to_return; - return 0; -} - -Status AnalyticEvalNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - //RETURN_IF_ERROR(QueryMaintenance(state)); - RETURN_IF_ERROR(state->check_query_state("Analytic eval, while get_next.")); - VLOG_FILE << id() << " GetNext: " << debug_state_string(false); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } else { - *eos = false; - } - - RETURN_IF_ERROR(process_child_batches(state)); - bool output_eos = false; - RETURN_IF_ERROR(get_next_output_batch(state, row_batch, &output_eos)); - - if (_curr_child_batch.get() == nullptr && output_eos) { - *eos = true; - } - - // Transfer resources to the output row batch if enough have accumulated and they're - // no longer needed by output rows to be returned later. - if (_prev_pool_last_result_idx != -1 && - _prev_pool_last_result_idx < _input_stream->rows_returned() && - _prev_pool_last_window_idx < _window_tuples.front().first) { - VLOG_FILE << id() << " Transfer prev pool to output batch, " - << " pool size: " << _prev_tuple_pool->total_allocated_bytes() - << " last result idx: " << _prev_pool_last_result_idx - << " last window idx: " << _prev_pool_last_window_idx; - row_batch->tuple_data_pool()->acquire_data(_prev_tuple_pool.get(), !*eos); - _prev_pool_last_result_idx = -1; - _prev_pool_last_window_idx = -1; - } - - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -Status AnalyticEvalNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - - if (_input_stream.get() != nullptr) { - _input_stream->close(); - } - - if (_block_mgr_client != nullptr) { - state->block_mgr2()->clear_reservations(_block_mgr_client); - } - // Close all evaluators and fn ctxs. If an error occurred in Init or prepare there may - // be fewer ctxs than evaluators. We also need to Finalize if _curr_tuple was created - // in Open. - DCHECK_LE(_fn_ctxs.size(), _evaluators.size()); - DCHECK(_curr_tuple == nullptr || _fn_ctxs.size() == _evaluators.size()); - - for (int i = 0; i < _evaluators.size(); ++i) { - // Need to make sure finalize is called in case there is any state to clean up. - if (_curr_tuple != nullptr) { - _evaluators[i]->finalize(_fn_ctxs[i], _curr_tuple, _dummy_result_tuple); - } - - _evaluators[i]->close(state); - } - - for (int i = 0; i < _fn_ctxs.size(); ++i) { - _fn_ctxs[i]->impl()->close(); - } - - if (_partition_by_eq_expr_ctx != nullptr) { - _partition_by_eq_expr_ctx->close(state); - } - if (_order_by_eq_expr_ctx != nullptr) { - _order_by_eq_expr_ctx->close(state); - } - if (_prev_child_batch.get() != nullptr) { - _prev_child_batch.reset(); - } - - if (_curr_child_batch.get() != nullptr) { - _curr_child_batch.reset(); - } - - if (_curr_tuple_pool.get() != nullptr) { - _curr_tuple_pool->free_all(); - } - if (_prev_tuple_pool.get() != nullptr) { - _prev_tuple_pool->free_all(); - } - if (_mem_pool.get() != nullptr) { - _mem_pool->free_all(); - } - ExecNode::close(state); - return Status::OK(); -} - -void AnalyticEvalNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "AnalyticEvalNode(" - << " window=" << debug_window_string(); - - if (_partition_by_eq_expr_ctx != nullptr) { - // *out << " partition_exprs=" << _partition_by_eq_expr_ctx->debug_string(); - } - - if (_order_by_eq_expr_ctx != nullptr) { - // *out << " order_by_exprs=" << _order_by_eq_expr_ctx->debug_string(); - } - - *out << AggFnEvaluator::debug_string(_evaluators); - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -//Status AnalyticEvalNode::QueryMaintenance(RuntimeState* state) { -// for (int i = 0; i < evaluators_.size(); ++i) { -// Expr::FreeLocalAllocations(evaluators_[i]->input_expr_ctxs()); -// } -// return ExecNode::QueryMaintenance(state); -//} - -} // namespace doris diff --git a/be/src/exec/analytic_eval_node.h b/be/src/exec/analytic_eval_node.h deleted file mode 100644 index c09de063a4..0000000000 --- a/be/src/exec/analytic_eval_node.h +++ /dev/null @@ -1,334 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/analytic-eval-node.h -// and modified by Doris - -#pragma once - -#include - -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "runtime/buffered_block_mgr2.h" -#include "runtime/buffered_tuple_stream2.h" -#include "runtime/buffered_tuple_stream2.inline.h" -#include "runtime/tuple.h" - -namespace doris { - -class AggFnEvaluator; - -// Evaluates analytic functions with a single pass over input rows. It is assumed -// that the input has already been sorted on all of the partition exprs and then the -// order by exprs. If there is no order by clause or partition clause, the input is -// unsorted. Uses a BufferedTupleStream to buffer input rows which are returned in a -// streaming fashion as entire row batches of output are ready to be returned, though in -// some cases the entire input must actually be consumed to produce any output rows. -// -// The output row is composed of the tuples from the child node followed by a single -// result tuple that holds the values of the evaluated analytic functions (one slot per -// analytic function). -// -// When enough input rows have been consumed to produce the results of all analytic -// functions for one or more rows (e.g. because the order by values are different for a -// RANGE window), the results of all the analytic functions for those rows are produced -// in a result tuple by calling GetValue()/Finalize() on the evaluators and storing the -// tuple in result_tuples_. Input row batches are fetched from the BufferedTupleStream, -// copied into output row batches, and the associated result tuple is set in each -// corresponding row. Result tuples may apply to many rows (e.g. an arbitrary number or -// an entire partition) so result_tuples_ stores a pair of the stream index (the last -// row in the stream it applies to) and the tuple. -// -// Input rows are consumed in a streaming fashion until enough input has been consumed -// in order to produce enough output rows. In some cases, this may mean that only a -// single input batch is needed to produce the results for an output batch, e.g. -// "SELECT RANK OVER (ORDER BY unique_col) ... ", but in other cases, an arbitrary -// number of rows may need to be buffered before result rows can be produced, e.g. if -// multiple rows have the same values for the order by exprs. The number of buffered -// rows may be an entire partition or even the entire input. Therefore, the output -// rows are buffered and may spill to disk via the BufferedTupleStream. -class AnalyticEvalNode : public ExecNode { -public: - ~AnalyticEvalNode() {} - AnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -protected: - // Frees local allocations from _evaluators - // virtual Status QueryMaintenance(RuntimeState* state); - - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - // The scope over which analytic functions are evaluated. Functions are either - // evaluated over a window (specified by a TAnalyticWindow) or an entire partition. - // This is used to avoid more complex logic where we often branch based on these - // cases, e.g. whether or not there is a window (i.e. no window = PARTITION) is stored - // separately from the window type (assuming there is a window). - enum AnalyticFnScope { - // Analytic functions are evaluated over an entire partition (or the entire data set - // if no partition clause was specified). Every row within a partition is added to - // _curr_tuple and buffered in the _input_stream. Once all rows in a partition have - // been consumed, a single result tuple is added to _result_tuples for all rows in - // that partition. - PARTITION, - - // Functions are evaluated over windows specified with range boundaries. Currently - // only supports the 'default window', i.e. UNBOUNDED PRECEDING to CURRENT ROW. In - // this case, when the values of the order by expressions change between rows a - // result tuple is added to _result_tuples for the previous rows with the same values - // for the order by expressions. This happens in try_add_result_tuple_for_prev_row() - // because we determine if the order by expression values changed between the - // previous and current row. - RANGE, - - // Functions are evaluated over windows specified with rows boundaries. A result - // tuple is added for every input row (except for some cases where the window extends - // before or after the partition). When the end boundary is offset from the current - // row, input rows are consumed and result tuples are produced for the associated - // preceding or following row. When the start boundary is offset from the current - // row, the first tuple (i.e. the input to the analytic functions) from the input - // rows are buffered in _window_tuples because they must later be removed from the - // window (by calling AggFnEvaluator::Remove() with the expired tuple to remove it - // from the current row). When either the start or end boundaries are offset from the - // current row, there is special casing around partition boundaries. - ROWS - }; - - // Evaluates analytic functions over _curr_child_batch. Each input row is passed - // to the evaluators and added to _input_stream where they are stored until a tuple - // containing the results of the analytic functions for that row is ready to be - // returned. When enough rows have been processed so that results can be produced for - // one or more rows, a tuple containing those results are stored in _result_tuples. - // That tuple gets set in the associated output row(s) later in get_next_output_batch(). - Status process_child_batch(RuntimeState* state); - - // Processes child batches (calling process_child_batch()) until enough output rows - // are ready to return an output batch. - Status process_child_batches(RuntimeState* state); - - // Returns a batch of output rows from _input_stream with the analytic function - // results (from _result_tuples) set as the last tuple. - Status get_next_output_batch(RuntimeState* state, RowBatch* row_batch, bool* eos); - - // Determines if there is a window ending at the previous row, and if so, calls - // add_result_tuple() with the index of the previous row in _input_stream. next_partition - // indicates if the current row is the start of a new partition. stream_idx is the - // index of the current input row from _input_stream. - void try_add_result_tuple_for_prev_row(bool next_partition, int64_t stream_idx, TupleRow* row); - - // Determines if there is a window ending at the current row, and if so, calls - // add_result_tuple() with the index of the current row in _input_stream. stream_idx is - // the index of the current input row from _input_stream. - void try_add_result_tuple_for_curr_row(int64_t stream_idx, TupleRow* row); - - // Adds additional result tuples at the end of a partition, e.g. if the end bound is - // FOLLOWING. partition_idx is the index into _input_stream of the new partition, - // prev_partition_idx is the index of the previous partition. - void try_add_remaining_results(int64_t partition_idx, int64_t prev_partition_idx); - - // Removes rows from _curr_tuple (by calling AggFnEvaluator::Remove()) that are no - // longer in the window (i.e. they are before the window start boundary). stream_idx - // is the index of the row in _input_stream that is currently being processed in - // process_child_batch(). - void try_remove_rows_before_window(int64_t stream_idx); - - // Initializes state at the start of a new partition. stream_idx is the index of the - // current input row from _input_stream. - void init_next_partition(int64_t stream_idx); - - // Produces a result tuple with analytic function results by calling GetValue() or - // Finalize() for _curr_tuple on the _evaluators. The result tuple is stored in - // _result_tuples with the index into _input_stream specified by stream_idx. - void add_result_tuple(int64_t stream_idx); - - // Gets the number of rows that are ready to be returned by subsequent calls to - // get_next_output_batch(). - int64_t num_output_rows_ready() const; - - // Resets the slots in current_tuple_ that store the intermediate results for lead(). - // This is necessary to produce the default value (set by Init()). - void reset_lead_fn_slots(); - - // Evaluates the predicate pred_ctx over _child_tuple_cmp_row, which is a TupleRow* - // containing the previous row and the current row set during process_child_batch(). - bool prev_row_compare(ExprContext* pred_ctx); - - // Debug string containing current state. If 'detailed', per-row state is included. - std::string debug_state_string(bool detailed) const; - - std::string debug_evaluated_rows_string() const; - - // Debug string containing the window definition. - std::string debug_window_string() const; - - // Window over which the analytic functions are evaluated. Only used if _fn_scope - // is ROWS or RANGE. - // TODO: _fn_scope and _window are candidates to be removed during codegen - const TAnalyticWindow _window; - - // Tuple descriptor for storing intermediate values of analytic fn evaluation. - const TupleDescriptor* _intermediate_tuple_desc; - - // Tuple descriptor for storing results of analytic fn evaluation. - const TupleDescriptor* _result_tuple_desc; - - // Tuple descriptor of the buffered tuple (identical to the input child tuple, which is - // assumed to come from a single SortNode). nullptr if both partition_exprs and - // order_by_exprs are empty. - TupleDescriptor* _buffered_tuple_desc; - - // TupleRow* composed of the first child tuple and the buffered tuple, used by - // _partition_by_eq_expr_ctx and _order_by_eq_expr_ctx. Set in prepare() if - // _buffered_tuple_desc is not nullptr, allocated from _mem_pool. - TupleRow* _child_tuple_cmp_row; - - // Expr context for a predicate that checks if child tuple '<' buffered tuple for - // partitioning exprs. - ExprContext* _partition_by_eq_expr_ctx; - - // Expr context for a predicate that checks if child tuple '<' buffered tuple for - // order by exprs. - ExprContext* _order_by_eq_expr_ctx; - - // The scope over which analytic functions are evaluated. - // TODO: Consider adding additional state to capture whether different kinds of window - // bounds need to be maintained, e.g. (_fn_scope == ROWS && _window.__isset.end_bound). - AnalyticFnScope _fn_scope; - - // Offset from the current row for ROWS windows with start or end bounds specified - // with offsets. Is positive if the offset is FOLLOWING, negative if PRECEDING, and 0 - // if type is CURRENT ROW or UNBOUNDED PRECEDING/FOLLOWING. - int64_t _rows_start_offset; - int64_t _rows_end_offset; - - // Analytic function evaluators. - std::vector _evaluators; - - // Indicates if each evaluator is the lead() fn. Used by reset_lead_fn_slots() to - // determine which slots need to be reset. - std::vector _is_lead_fn; - - // If true, evaluating FIRST_VALUE requires special null handling when initializing new - // partitions determined by the offset. Set in Open() by inspecting the agg fns. - bool _has_first_val_null_offset; - long _first_val_null_offset; - - // FunctionContext for each analytic function. String data returned by the analytic - // functions is allocated via these contexts. - std::vector _fn_ctxs; - - // Queue of tuples which are ready to be set in output rows, with the index into - // the _input_stream stream of the last TupleRow that gets the Tuple. Pairs are - // pushed onto the queue in process_child_batch() and dequeued in order in - // get_next_output_batch(). The size of _result_tuples is limited by 2 times the - // row batch size because we only process input batches if there are not enough - // result tuples to produce a single batch of output rows. In the worst case there - // may be a single result tuple per output row and _result_tuples.size() may be one - // less than the row batch size, in which case we will process another input row batch - // (inserting one result tuple per input row) before returning a row batch. - std::list> _result_tuples; - - // Index in _input_stream of the most recently added result tuple. - int64_t _last_result_idx; - - // Child tuples (described by _child_tuple_desc) that are currently within the window - // and the index into _input_stream of the row they're associated with. Only used when - // window start bound is PRECEDING or FOLLOWING. Tuples in this list are deep copied - // and owned by curr_window_tuple_pool_. - // TODO: Remove and use BufferedTupleStream (needs support for multiple readers). - std::list> _window_tuples; - TupleDescriptor* _child_tuple_desc; - - // Pools used to allocate result tuples (added to _result_tuples and later returned) - // and window tuples (added to _window_tuples to buffer the current window). Resources - // are transferred from _curr_tuple_pool to _prev_tuple_pool once it is at least - // MAX_TUPLE_POOL_SIZE bytes. Resources from _prev_tuple_pool are transferred to an - // output row batch when all result tuples it contains have been returned and all - // window tuples it contains are no longer needed. - std::unique_ptr _curr_tuple_pool; - std::unique_ptr _prev_tuple_pool; - - // The index of the last row from _input_stream associated with output row containing - // resources in _prev_tuple_pool. -1 when the pool is empty. Resources from - // _prev_tuple_pool can only be transferred to an output batch once all rows containing - // these tuples have been returned. - int64_t _prev_pool_last_result_idx; - - // The index of the last row from _input_stream associated with window tuples - // containing resources in _prev_tuple_pool. -1 when the pool is empty. Resources from - // _prev_tuple_pool can only be transferred to an output batch once all rows containing - // these tuples are no longer needed (removed from the _window_tuples). - int64_t _prev_pool_last_window_idx; - - // The tuple described by _intermediate_tuple_desc storing intermediate state for the - // _evaluators. When enough input rows have been consumed to produce the analytic - // function results, a result tuple (described by _result_tuple_desc) is created and - // the agg fn results are written to that tuple by calling Finalize()/GetValue() - // on the evaluators with _curr_tuple as the source tuple. - Tuple* _curr_tuple; - - // A tuple described by _result_tuple_desc used when calling Finalize() on the - // _evaluators to release resources between partitions; the value is never used. - // TODO: Remove when agg fns implement a separate Close() method to release resources. - Tuple* _dummy_result_tuple; - - // Index of the row in _input_stream at which the current partition started. - int64_t _curr_partition_idx; - - // Previous input row used to compare partition boundaries and to determine when the - // order-by expressions change. - TupleRow* _prev_input_row; - - // Current and previous input row batches from the child. RowBatches are allocated - // once and reused. Previous input row batch owns _prev_input_row between calls to - // process_child_batch(). The prev batch is Reset() after calling process_child_batch() - // and then swapped with the curr batch so the RowBatch owning _prev_input_row is - // stored in _prev_child_batch for the next call to process_child_batch(). - std::unique_ptr _prev_child_batch; - std::unique_ptr _curr_child_batch; - - // Block manager client used by _input_stream. Not owned. - BufferedBlockMgr2::Client* _block_mgr_client; - - // Buffers input rows added in process_child_batch() until enough rows are able to - // be returned by get_next_output_batch(), in which case row batches are returned from - // the front of the stream and the underlying buffered blocks are deleted once read. - // The number of rows that must be buffered may vary from an entire partition (e.g. - // no order by clause) to a single row (e.g. ROWS windows). When the amount of - // buffered data exceeds the available memory in the underlying BufferedBlockMgr, - // _input_stream is unpinned (i.e., possibly spilled to disk if necessary). - // TODO: Consider re-pinning unpinned streams when possible. - std::unique_ptr _input_stream; - - // Pool used for O(1) allocations that live until close. - std::unique_ptr _mem_pool; - - // True when there are no more input rows to consume from our child. - bool _input_eos; - - // Time spent processing the child rows. - RuntimeProfile::Counter* _evaluation_timer; -}; - -} // namespace doris diff --git a/be/src/exec/assert_num_rows_node.cpp b/be/src/exec/assert_num_rows_node.cpp deleted file mode 100644 index 36deaab07e..0000000000 --- a/be/src/exec/assert_num_rows_node.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/assert_num_rows_node.h" - -#include "gen_cpp/PlanNodes_types.h" -#include "gutil/strings/substitute.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/runtime_profile.h" - -namespace doris { - -AssertNumRowsNode::AssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _desired_num_rows(tnode.assert_num_rows_node.desired_num_rows), - _subquery_string(tnode.assert_num_rows_node.subquery_string) { - if (tnode.assert_num_rows_node.__isset.assertion) { - _assertion = tnode.assert_num_rows_node.assertion; - } else { - _assertion = TAssertion::LE; // just compatible for the previous code - } -} - -Status AssertNumRowsNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - return Status::OK(); -} - -Status AssertNumRowsNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - return Status::OK(); -} - -Status AssertNumRowsNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // ISSUE-3435 - RETURN_IF_ERROR(child(0)->open(state)); - return Status::OK(); -} - -Status AssertNumRowsNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - output_batch->reset(); - child(0)->get_next(state, output_batch, eos); - _num_rows_returned += output_batch->num_rows(); - bool assert_res = false; - switch (_assertion) { - case TAssertion::EQ: - assert_res = _num_rows_returned == _desired_num_rows; - break; - case TAssertion::NE: - assert_res = _num_rows_returned != _desired_num_rows; - break; - case TAssertion::LT: - assert_res = _num_rows_returned < _desired_num_rows; - break; - case TAssertion::LE: - assert_res = _num_rows_returned <= _desired_num_rows; - break; - case TAssertion::GT: - assert_res = _num_rows_returned > _desired_num_rows; - break; - case TAssertion::GE: - assert_res = _num_rows_returned >= _desired_num_rows; - break; - default: - break; - } - - if (!assert_res) { - auto to_string_lambda = [](TAssertion::type assertion) { - std::map::const_iterator it = - _TAssertion_VALUES_TO_NAMES.find(assertion); - - if (it == _TAggregationOp_VALUES_TO_NAMES.end()) { - return "NULL"; - } else { - return it->second; - } - }; - LOG(INFO) << "Expected " << to_string_lambda(_assertion) << " " << _desired_num_rows - << " to be returned by expression " << _subquery_string; - return Status::Cancelled("Expected {} {} to be returned by expression {}", - to_string_lambda(_assertion), _desired_num_rows, _subquery_string); - } - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -Status AssertNumRowsNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - return ExecNode::close(state); -} - -} // namespace doris diff --git a/be/src/exec/assert_num_rows_node.h b/be/src/exec/assert_num_rows_node.h deleted file mode 100644 index 2f055117d8..0000000000 --- a/be/src/exec/assert_num_rows_node.h +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/exec_node.h" -#include "gen_cpp/PlanNodes_types.h" - -namespace doris { - -// Node for assert row count -class AssertNumRowsNode : public ExecNode { -public: - AssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~AssertNumRowsNode() {}; - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -private: - int64_t _desired_num_rows; - const std::string _subquery_string; - TAssertion::type _assertion; -}; - -} // namespace doris diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp deleted file mode 100644 index 2bae8e3668..0000000000 --- a/be/src/exec/blocking_join_node.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/blocking-join-node.cc -// and modified by Doris - -#include "exec/blocking_join_node.h" - -#include - -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" - -namespace doris { - -BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, - ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _node_name(node_name), - _join_op(join_op), - _left_side_eos(false) {} - -Status BlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { - return ExecNode::init(tnode, state); -} - -BlockingJoinNode::~BlockingJoinNode() { - // _left_batch must be cleaned up in close() to ensure proper resource freeing. - DCHECK(_left_batch == nullptr); -} - -Status BlockingJoinNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - _build_pool.reset(new MemPool(mem_tracker_held())); - _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); - _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime"); - _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); - _left_child_row_counter = ADD_COUNTER(runtime_profile(), "LeftChildRows", TUnit::UNIT); - - _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); - - // pre-compute the tuple index of build tuples in the output row - int num_left_tuples = child(0)->row_desc().tuple_descriptors().size(); - int num_build_tuples = child(1)->row_desc().tuple_descriptors().size(); - - _build_tuple_size = num_build_tuples; - _build_tuple_idx.reserve(_build_tuple_size); - - for (int i = 0; i < _build_tuple_size; ++i) { - TupleDescriptor* build_tuple_desc = child(1)->row_desc().tuple_descriptors()[i]; - auto tuple_idx = _row_descriptor.get_tuple_idx(build_tuple_desc->id()); - RETURN_IF_INVALID_TUPLE_IDX(build_tuple_desc->id(), tuple_idx); - _build_tuple_idx.push_back(tuple_idx); - } - - _probe_tuple_row_size = num_left_tuples * sizeof(Tuple*); - _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); - - _left_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - return Status::OK(); -} - -Status BlockingJoinNode::close(RuntimeState* state) { - // TODO(zhaochun): avoid double close - // if (is_closed()) return Status::OK(); - _left_batch.reset(); - ExecNode::close(state); - return Status::OK(); -} - -void BlockingJoinNode::build_side_thread(RuntimeState* state, std::promise* status) { - SCOPED_ATTACH_TASK(state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - status->set_value(construct_build_side(state)); -} - -Status BlockingJoinNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // RETURN_IF_ERROR(Expr::open(_conjuncts, state)); - - RETURN_IF_CANCELLED(state); - // TODO(zhaochun) - // RETURN_IF_ERROR(state->check_query_state()); - - _eos = false; - - // Kick-off the construction of the build-side table in a separate - // thread, so that the left child can do any initialisation in parallel. - // Only do this if we can get a thread token. Otherwise, do this in the - // main thread - std::promise build_side_status; - - add_runtime_exec_option("Join Build-Side Prepared Asynchronously"); - std::thread(bind(&BlockingJoinNode::build_side_thread, this, state, &build_side_status)) - .detach(); - - // Open the left child so that it may perform any initialisation in parallel. - // Don't exit even if we see an error, we still need to wait for the build thread - // to finish. - Status open_status = child(0)->open(state); - - // Blocks until ConstructBuildSide has returned, after which the build side structures - // are fully constructed. - RETURN_IF_ERROR(build_side_status.get_future().get()); - // We can close the right child to release its resources because its input has been - // fully consumed. - child(1)->close(state); - - RETURN_IF_ERROR(open_status); - - // Seed left child in preparation for get_next(). - while (true) { - RETURN_IF_ERROR(child(0)->get_next(state, _left_batch.get(), &_left_side_eos)); - COUNTER_UPDATE(_left_child_row_counter, _left_batch->num_rows()); - _left_batch_pos = 0; - - if (_left_batch->num_rows() == 0) { - if (_left_side_eos) { - init_get_next(nullptr /* eos */); - _eos = true; - break; - } - - _left_batch->reset(); - continue; - } else { - _current_left_child_row = _left_batch->get_row(_left_batch_pos++); - init_get_next(_current_left_child_row); - break; - } - } - - return Status::OK(); -} - -void BlockingJoinNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << std::string(indentation_level * 2, ' '); - *out << _node_name; - *out << "(eos=" << (_eos ? "true" : "false") << " left_batch_pos=" << _left_batch_pos; - add_to_debug_string(indentation_level, out); - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -std::string BlockingJoinNode::get_left_child_row_string(TupleRow* row) { - std::stringstream out; - out << "["; - int* _build_tuple_idx_ptr = &_build_tuple_idx[0]; - - for (int i = 0; i < row_desc().tuple_descriptors().size(); ++i) { - if (i != 0) { - out << " "; - } - - int* is_build_tuple = - std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); - - if (is_build_tuple != _build_tuple_idx_ptr + _build_tuple_size) { - out << Tuple::to_string(nullptr, *row_desc().tuple_descriptors()[i]); - } else { - out << Tuple::to_string(row->get_tuple(i), *row_desc().tuple_descriptors()[i]); - } - } - - out << "]"; - return out.str(); -} - -// This function is replaced by codegen -void BlockingJoinNode::create_output_row(TupleRow* out, TupleRow* left, TupleRow* build) { - uint8_t* out_ptr = reinterpret_cast(out); - if (left == nullptr) { - memset(out_ptr, 0, _probe_tuple_row_size); - } else { - memcpy(out_ptr, left, _probe_tuple_row_size); - } - - if (build == nullptr) { - memset(out_ptr + _probe_tuple_row_size, 0, _build_tuple_row_size); - } else { - memcpy(out_ptr + _probe_tuple_row_size, build, _build_tuple_row_size); - } -} - -} // namespace doris diff --git a/be/src/exec/blocking_join_node.h b/be/src/exec/blocking_join_node.h deleted file mode 100644 index 75bbd13e83..0000000000 --- a/be/src/exec/blocking_join_node.h +++ /dev/null @@ -1,133 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/blocking-join-node.h -// and modified by Doris - -#pragma once - -#include -#include -#include - -#include "exec/exec_node.h" -#include "gen_cpp/PlanNodes_types.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TupleRow; - -// Abstract base class for join nodes that block while consuming all rows from their -// right child in open(). -class BlockingJoinNode : public ExecNode { -public: - BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op, ObjectPool* pool, - const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual ~BlockingJoinNode(); - - // Subclasses should call BlockingJoinNode::init() and then perform any other init() - // work, e.g. creating expr trees. - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - - // Subclasses should call BlockingJoinNode::prepare() and then perform any other - // prepare() work, e.g. codegen. - virtual Status prepare(RuntimeState* state); - - // Open prepares the build side structures (subclasses should implement - // construct_build_side()) and then prepares for GetNext with the first left child row - // (subclasses should implement init_get_next()). - virtual Status open(RuntimeState* state); - - // Subclasses should close any other structures and then call - // BlockingJoinNode::close(). - virtual Status close(RuntimeState* state); - -private: - const std::string _node_name; - TJoinOp::type _join_op; - bool _eos; // if true, nothing left to return in get_next() - std::unique_ptr _build_pool; // holds everything referenced from build side - - // _left_batch must be cleared before calling get_next(). The child node - // does not initialize all tuple ptrs in the row, only the ones that it - // is responsible for. - std::unique_ptr _left_batch; - int _left_batch_pos; // current scan pos in _left_batch - bool _left_side_eos; // if true, left child has no more rows to process - TupleRow* _current_left_child_row; - - // _build_tuple_idx[i] is the tuple index of child(1)'s tuple[i] in the output row - std::vector _build_tuple_idx; - int _build_tuple_size; - - // Size of the TupleRow (just the Tuple ptrs) from the build (right) and probe (left) - // sides. - int _probe_tuple_row_size; - int _build_tuple_row_size; - - // byte size of result tuple row (sum of the tuple ptrs, not the tuple data). - // This should be the same size as the left child tuple row. - int _result_tuple_row_size; - - RuntimeProfile::Counter* _build_timer; // time to prepare build side - RuntimeProfile::Counter* _left_child_timer; // time to process left child batch - RuntimeProfile::Counter* _build_row_counter; // num build rows - RuntimeProfile::Counter* _left_child_row_counter; // num left child rows - - // Init the build-side state for a new left child row (e.g. hash table iterator or list - // iterator) given the first row. Used in open() to prepare for get_next(). - // A nullptr ptr for first_left_child_row indicates the left child eos. - virtual void init_get_next(TupleRow* first_left_child_row) = 0; - - // We parallelize building the build-side with Opening the - // left child. If, for example, the left child is another - // join node, it can start to build its own build-side at the - // same time. - virtual Status construct_build_side(RuntimeState* state) = 0; - - // Gives subclasses an opportunity to add debug output to the debug string printed by - // debug_string(). - virtual void add_to_debug_string(int indentation_level, std::stringstream* out) const {} - - // Subclasses should not override, use add_to_debug_string() to add to the result. - virtual void debug_string(int indentation_level, std::stringstream* out) const; - - // Returns a debug string for the left child's 'row'. They have tuple ptrs that are - // uninitialized; the left child only populates the tuple ptrs it is responsible - // for. This function outputs just the row values and leaves the build - // side values as nullptr. - // This is only used for debugging and outputting the left child rows before - // doing the join. - std::string get_left_child_row_string(TupleRow* row); - - // Write combined row, consisting of the left child's 'left_row' and right child's - // 'build_row' to 'out_row'. - // This is replaced by codegen. - void create_output_row(TupleRow* out_row, TupleRow* left_row, TupleRow* build_row); - - friend class CrossJoinNode; - -private: - // Supervises ConstructBuildSide in a separate thread, and returns its status in the - // promise parameter. - void build_side_thread(RuntimeState* state, std::promise* status); -}; - -} // namespace doris diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp deleted file mode 100644 index 580a126279..0000000000 --- a/be/src/exec/broker_scan_node.cpp +++ /dev/null @@ -1,418 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/broker_scan_node.h" - -#include -#include - -#include "common/object_pool.h" -#include "exec/json_scanner.h" -#include "exec/orc_scanner.h" -#include "exec/parquet_scanner.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/runtime_profile.h" -#include "util/thread.h" -#include "vec/exec/vbroker_scanner.h" -#include "vec/exec/vjson_scanner.h" -#include "vec/exec/vorc_scanner.h" -#include "vec/exec/vparquet_scanner.h" - -namespace doris { - -BrokerScanNode::BrokerScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _tuple_id(tnode.broker_scan_node.tuple_id), - _runtime_state(nullptr), - _tuple_desc(nullptr), - _num_running_scanners(0), - _scan_finished(false), - _max_buffered_batches(32), - _wait_scanner_timer(nullptr) {} - -BrokerScanNode::~BrokerScanNode() {} - -Status BrokerScanNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ScanNode::init(tnode, state)); - auto& broker_scan_node = tnode.broker_scan_node; - - if (broker_scan_node.__isset.pre_filter_exprs) { - _pre_filter_texprs = broker_scan_node.pre_filter_exprs; - } - - return Status::OK(); -} - -Status BrokerScanNode::prepare(RuntimeState* state) { - VLOG_QUERY << "BrokerScanNode prepare"; - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // get tuple desc - _runtime_state = state; - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - if (_tuple_desc == nullptr) { - return Status::InternalError("Failed to get tuple descriptor, _tuple_id={}", _tuple_id); - } - - // Initialize slots map - for (auto slot : _tuple_desc->slots()) { - auto pair = _slots_map.emplace(slot->col_name(), slot); - if (!pair.second) { - return Status::InternalError("Failed to insert slot, col_name={}", slot->col_name()); - } - } - - // Profile - _wait_scanner_timer = ADD_TIMER(runtime_profile(), "WaitScannerTime"); - - return Status::OK(); -} - -Status BrokerScanNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - - RETURN_IF_ERROR(start_scanners()); - - return Status::OK(); -} - -Status BrokerScanNode::start_scanners() { - { - std::unique_lock l(_batch_queue_lock); - _num_running_scanners = 1; - } - _scanner_threads.emplace_back(&BrokerScanNode::scanner_worker, this, 0, _scan_ranges.size()); - return Status::OK(); -} - -Status BrokerScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // check if CANCELLED. - if (state->is_cancelled()) { - std::unique_lock l(_batch_queue_lock); - if (update_status(Status::Cancelled("Cancelled"))) { - // Notify all scanners - _queue_writer_cond.notify_all(); - } - } - - if (_scan_finished.load()) { - *eos = true; - return Status::OK(); - } - - std::shared_ptr scanner_batch; - { - std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && !_runtime_state->is_cancelled() && - _num_running_scanners > 0 && _batch_queue.empty()) { - SCOPED_TIMER(_wait_scanner_timer); - _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); - } - if (!_process_status.ok()) { - // Some scanner process failed. - return _process_status; - } - if (_runtime_state->is_cancelled()) { - if (update_status(Status::Cancelled("Cancelled"))) { - _queue_writer_cond.notify_all(); - } - return _process_status; - } - if (!_batch_queue.empty()) { - scanner_batch = _batch_queue.front(); - _batch_queue.pop_front(); - } - } - - // All scanner has been finished, and all cached batch has been read - if (scanner_batch == nullptr) { - _scan_finished.store(true); - *eos = true; - return Status::OK(); - } - - // notify one scanner - _queue_writer_cond.notify_one(); - - // get scanner's batch memory - row_batch->acquire_state(scanner_batch.get()); - _num_rows_returned += row_batch->num_rows(); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - // This is first time reach limit. - // Only valid when query 'select * from table1 limit 20' - if (reached_limit()) { - int num_rows_over = _num_rows_returned - _limit; - row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); - _num_rows_returned -= num_rows_over; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - _scan_finished.store(true); - _queue_writer_cond.notify_all(); - *eos = true; - } else { - *eos = false; - } - - if (VLOG_ROW_IS_ON) { - for (int i = 0; i < row_batch->num_rows(); ++i) { - TupleRow* row = row_batch->get_row(i); - VLOG_ROW << "BrokerScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); - } - } - - return Status::OK(); -} - -Status BrokerScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - SCOPED_TIMER(_runtime_profile->total_time_counter()); - _scan_finished.store(true); - _queue_writer_cond.notify_all(); - _queue_reader_cond.notify_all(); - for (int i = 0; i < _scanner_threads.size(); ++i) { - _scanner_threads[i].join(); - } - - // Close - _batch_queue.clear(); - - return ExecNode::close(state); -} - -// This function is called after plan node has been prepared. -Status BrokerScanNode::set_scan_ranges(const std::vector& scan_ranges) { - _scan_ranges = scan_ranges; - return Status::OK(); -} - -void BrokerScanNode::debug_string(int ident_level, std::stringstream* out) const { - (*out) << "BrokerScanNode"; -} - -std::unique_ptr BrokerScanNode::create_scanner(const TBrokerScanRange& scan_range, - ScannerCounter* counter) { - BaseScanner* scan = nullptr; - switch (scan_range.ranges[0].format_type) { - case TFileFormatType::FORMAT_PARQUET: - if (_vectorized) { - scan = new vectorized::VParquetScanner( - _runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, - scan_range.broker_addresses, _pre_filter_texprs, counter); - } else { - scan = new ParquetScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); - } - break; - case TFileFormatType::FORMAT_ORC: - if (_vectorized) { - scan = new vectorized::VORCScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); - } else { - scan = new ORCScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); - } - break; - case TFileFormatType::FORMAT_JSON: - if (_vectorized) { - if (config::enable_simdjson_reader) { - scan = new vectorized::VJsonScanner( - _runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, - scan_range.broker_addresses, _pre_filter_texprs, counter); - } else { - scan = new vectorized::VJsonScanner( - _runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, - scan_range.broker_addresses, _pre_filter_texprs, counter); - } - } else { - scan = new JsonScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); - } - break; - default: - if (_vectorized) { - scan = new vectorized::VBrokerScanner( - _runtime_state, runtime_profile(), scan_range.params, scan_range.ranges, - scan_range.broker_addresses, _pre_filter_texprs, counter); - } else { - scan = new BrokerScanner(_runtime_state, runtime_profile(), scan_range.params, - scan_range.ranges, scan_range.broker_addresses, - _pre_filter_texprs, counter); - } - } - scan->reg_conjunct_ctxs(_tuple_id, _conjunct_ctxs); - std::unique_ptr scanner(scan); - return scanner; -} - -Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, - const std::vector& conjunct_ctxs, - ScannerCounter* counter) { - //create scanner object and open - Thread::set_self_name("broker_scanner"); - std::unique_ptr scanner = create_scanner(scan_range, counter); - RETURN_IF_ERROR(scanner->open()); - bool scanner_eof = false; - - while (!scanner_eof) { - // Fill one row batch - std::shared_ptr row_batch(new RowBatch(row_desc(), _runtime_state->batch_size())); - - // create new tuple buffer for row_batch - MemPool* tuple_pool = row_batch->tuple_data_pool(); - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = tuple_pool->allocate(tuple_buffer_size); - if (tuple_buffer == nullptr) { - return Status::InternalError("Allocate memory for row batch failed."); - } - - Tuple* tuple = reinterpret_cast(tuple_buffer); - while (!scanner_eof) { - RETURN_IF_CANCELLED(_runtime_state); - // If we have finished all works - if (_scan_finished.load()) { - return Status::OK(); - } - - // This row batch has been filled up, and break this - if (row_batch->is_full() || row_batch->is_full_uncommitted()) { - break; - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - // scan node is the first tuple of tuple row - row->set_tuple(0, tuple); - memset(tuple, 0, _tuple_desc->num_null_bytes()); - - // Get from scanner - bool tuple_fill = false; - RETURN_IF_ERROR(scanner->get_next(tuple, tuple_pool, &scanner_eof, &tuple_fill)); - if (scanner_eof) { - continue; - } - - // if read row succeed, but fill dest tuple fail, we need to increase # of uncommitted rows, - // once reach the capacity of row batch, will transfer the row batch to next operator to release memory - if (!tuple_fill) { - row_batch->increase_uncommitted_rows(); - continue; - } - - // eval conjuncts of this row. - if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - char* new_tuple = reinterpret_cast(tuple); - new_tuple += _tuple_desc->byte_size(); - tuple = reinterpret_cast(new_tuple); - // counter->num_rows_returned++; - } else { - counter->num_rows_unselected++; - } - } - - // Row batch has been filled, push this to the queue - if (row_batch->num_rows() > 0) { - std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && !_scan_finished.load() && - !_runtime_state->is_cancelled() && - // stop pushing more batch if - // 1. too many batches in queue, or - // 2. at least one batch in queue and memory exceed limit. - (_batch_queue.size() >= _max_buffered_batches || !_batch_queue.empty())) { - _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); - } - // Process already set failed, so we just return OK - if (!_process_status.ok()) { - return Status::OK(); - } - // Scan already finished, just return - if (_scan_finished.load()) { - return Status::OK(); - } - // Runtime state is canceled, just return cancel - if (_runtime_state->is_cancelled()) { - return Status::Cancelled("Cancelled"); - } - // Queue size Must be smaller than _max_buffered_batches - _batch_queue.push_back(row_batch); - - // Notify reader to process - _queue_reader_cond.notify_one(); - } - } - - return Status::OK(); -} - -void BrokerScanNode::scanner_worker(int start_idx, int length) { - SCOPED_ATTACH_TASK(_runtime_state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - // Clone expr context - std::vector scanner_expr_ctxs; - auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs); - if (!status.ok()) { - LOG(WARNING) << "Clone conjuncts failed."; - } - - ScannerCounter counter; - for (int i = 0; i < length && status.ok(); ++i) { - const TBrokerScanRange& scan_range = - _scan_ranges[start_idx + i].scan_range.broker_scan_range; - status = scanner_scan(scan_range, scanner_expr_ctxs, &counter); - if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx + i << "] process failed. status=" << status; - } - } - - // Update stats - _runtime_state->update_num_rows_load_filtered(counter.num_rows_filtered); - _runtime_state->update_num_rows_load_unselected(counter.num_rows_unselected); - - // scanner is going to finish - { - std::lock_guard l(_batch_queue_lock); - if (!status.ok()) { - update_status(status); - } - // This scanner will finish - _num_running_scanners--; - } - _queue_reader_cond.notify_all(); - // If one scanner failed, others don't need scan any more - if (!status.ok()) { - _queue_writer_cond.notify_all(); - } - Expr::close(scanner_expr_ctxs, _runtime_state); -} - -} // namespace doris diff --git a/be/src/exec/broker_scan_node.h b/be/src/exec/broker_scan_node.h deleted file mode 100644 index e8fb99b24b..0000000000 --- a/be/src/exec/broker_scan_node.h +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "base_scanner.h" -#include "common/status.h" -#include "exec/scan_node.h" -#include "gen_cpp/PaloInternalService_types.h" - -namespace doris { - -class RuntimeState; -class PartRangeKey; -class PartitionInfo; -struct ScannerCounter; - -class BrokerScanNode : public ScanNode { -public: - BrokerScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~BrokerScanNode() override; - - // Called after create this scan node - Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - - // Prepare partition infos & set up timer - Status prepare(RuntimeState* state) override; - - // Start broker scan using ParquetScanner or BrokerScanner. - Status open(RuntimeState* state) override; - - // Fill the next row batch by calling next() on the scanner, - Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - - Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos) override { - return Status::NotSupported("Not Implemented get block"); - } - - // Close the scanner, and report errors. - Status close(RuntimeState* state) override; - - // No use - Status set_scan_ranges(const std::vector& scan_ranges) override; - -protected: - // Write debug string of this into out. - void debug_string(int indentation_level, std::stringstream* out) const override; - - // Update process status to one failed status, - // NOTE: Must hold the mutex of this scan node - bool update_status(const Status& new_status) { - if (_process_status.ok()) { - _process_status = new_status; - return true; - } - return false; - } - - std::unique_ptr create_scanner(const TBrokerScanRange& scan_range, - ScannerCounter* counter); - -private: - // Create scanners to do scan job - virtual Status start_scanners(); - - // One scanner worker, This scanner will handle 'length' ranges start from start_idx - void scanner_worker(int start_idx, int length); - - // Scan one range - Status scanner_scan(const TBrokerScanRange& scan_range, - const std::vector& conjunct_ctxs, ScannerCounter* counter); - -protected: - bool _vectorized = false; - TupleId _tuple_id; - RuntimeState* _runtime_state; - TupleDescriptor* _tuple_desc; - std::map _slots_map; - std::vector _scan_ranges; - - std::mutex _batch_queue_lock; - std::condition_variable _queue_reader_cond; - std::condition_variable _queue_writer_cond; - std::deque> _batch_queue; - - int _num_running_scanners; - - std::atomic _scan_finished; - - Status _process_status; - - std::vector _scanner_threads; - - int _max_buffered_batches; - - // The origin preceding filter exprs. - // These exprs will be converted to expr context - // in XXXScanner. - // Because the row descriptor used for these exprs is `src_row_desc`, - // which is initialized in XXXScanner. - std::vector _pre_filter_texprs; - - RuntimeProfile::Counter* _wait_scanner_timer; -}; - -} // namespace doris diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp deleted file mode 100644 index 3755f4eca8..0000000000 --- a/be/src/exec/broker_scanner.cpp +++ /dev/null @@ -1,520 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/broker_scanner.h" - -#include -#include - -#include -#include - -#include "common/consts.h" -#include "exec/decompressor.h" -#include "exec/plain_binary_line_reader.h" -#include "exec/plain_text_line_reader.h" -#include "io/file_factory.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/tuple.h" -#include "util/string_util.h" -#include "util/utf8_check.h" - -namespace doris { - -BrokerScanner::BrokerScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter) - : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), - _cur_file_reader(nullptr), - _cur_line_reader(nullptr), - _cur_decompressor(nullptr), - _cur_line_reader_eof(false), - _skip_lines(0) { - if (params.__isset.column_separator_length && params.column_separator_length > 1) { - _value_separator = params.column_separator_str; - _value_separator_length = params.column_separator_length; - } else { - _value_separator.push_back(static_cast(params.column_separator)); - _value_separator_length = 1; - } - if (params.__isset.line_delimiter_length && params.line_delimiter_length > 1) { - _line_delimiter = params.line_delimiter_str; - _line_delimiter_length = params.line_delimiter_length; - } else { - _line_delimiter.push_back(static_cast(params.line_delimiter)); - _line_delimiter_length = 1; - } - _split_values.reserve(sizeof(Slice) * params.src_slot_ids.size()); -} - -BrokerScanner::~BrokerScanner() { - close(); -} - -Status BrokerScanner::open() { - RETURN_IF_ERROR(BaseScanner::open()); // base default function - return Status::OK(); -} - -Status BrokerScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { - SCOPED_TIMER(_read_timer); - // Get one line - while (!_scanner_eof) { - if (_cur_line_reader == nullptr || _cur_line_reader_eof) { - RETURN_IF_ERROR(open_next_reader()); - // If there isn't any more reader, break this - if (_scanner_eof) { - continue; - } - } - const uint8_t* ptr = nullptr; - size_t size = 0; - RETURN_IF_ERROR(_cur_line_reader->read_line(&ptr, &size, &_cur_line_reader_eof)); - if (_skip_lines > 0) { - _skip_lines--; - continue; - } - if (size == 0) { - // Read empty row, just continue - continue; - } - { - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_convert_one_row(Slice(ptr, size), tuple, tuple_pool, fill_tuple)); - break; // break always - } - } - - *eof = _scanner_eof; - return Status::OK(); -} - -Status BrokerScanner::open_next_reader() { - if (_next_range >= _ranges.size()) { - _scanner_eof = true; - return Status::OK(); - } - - RETURN_IF_ERROR(open_file_reader()); - RETURN_IF_ERROR(open_line_reader()); - _next_range++; - - return Status::OK(); -} - -Status BrokerScanner::open_file_reader() { - const TBrokerRangeDesc& range = _ranges[_next_range]; - int64_t start_offset = range.start_offset; - if (start_offset != 0) { - start_offset -= 1; - } - //means first range, skip - if (start_offset == 0 && range.header_type.size() > 0) { - std::string header_type = to_lower(range.header_type); - if (header_type == BeConsts::CSV_WITH_NAMES) { - _skip_lines = 1; - } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) { - _skip_lines = 2; - } - } - - if (range.file_type == TFileType::FILE_STREAM) { - RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s)); - _real_reader = _cur_file_reader_s.get(); - } else { - RETURN_IF_ERROR(FileFactory::create_file_reader( - range.file_type, _state->exec_env(), _profile, _broker_addresses, - _params.properties, range, start_offset, _cur_file_reader)); - _real_reader = _cur_file_reader.get(); - } - return _real_reader->open(); -} - -Status BrokerScanner::create_decompressor(TFileFormatType::type type) { - if (_cur_decompressor != nullptr) { - delete _cur_decompressor; - _cur_decompressor = nullptr; - } - - CompressType compress_type; - switch (type) { - case TFileFormatType::FORMAT_CSV_PLAIN: - case TFileFormatType::FORMAT_JSON: - case TFileFormatType::FORMAT_PROTO: - compress_type = CompressType::UNCOMPRESSED; - break; - case TFileFormatType::FORMAT_CSV_GZ: - compress_type = CompressType::GZIP; - break; - case TFileFormatType::FORMAT_CSV_BZ2: - compress_type = CompressType::BZIP2; - break; - case TFileFormatType::FORMAT_CSV_LZ4FRAME: - compress_type = CompressType::LZ4FRAME; - break; - case TFileFormatType::FORMAT_CSV_LZOP: - compress_type = CompressType::LZOP; - break; - case TFileFormatType::FORMAT_CSV_DEFLATE: - compress_type = CompressType::DEFLATE; - break; - default: { - return Status::InternalError("Unknown format type, cannot inference compress type, type={}", - type); - } - } - RETURN_IF_ERROR(Decompressor::create_decompressor(compress_type, &_cur_decompressor)); - - return Status::OK(); -} - -Status BrokerScanner::open_line_reader() { - if (_cur_decompressor != nullptr) { - delete _cur_decompressor; - _cur_decompressor = nullptr; - } - - if (_cur_line_reader != nullptr) { - delete _cur_line_reader; - _cur_line_reader = nullptr; - } - - const TBrokerRangeDesc& range = _ranges[_next_range]; - int64_t size = range.size; - if (range.start_offset != 0) { - if (range.format_type != TFileFormatType::FORMAT_CSV_PLAIN) { - return Status::InternalError("For now we do not support split compressed file"); - } - size += 1; - // not first range will always skip one line - _skip_lines = 1; - } - - // create decompressor. - // _decompressor may be nullptr if this is not a compressed file - RETURN_IF_ERROR(create_decompressor(range.format_type)); - - _file_format_type = range.format_type; - // open line reader - switch (range.format_type) { - case TFileFormatType::FORMAT_CSV_PLAIN: - case TFileFormatType::FORMAT_CSV_GZ: - case TFileFormatType::FORMAT_CSV_BZ2: - case TFileFormatType::FORMAT_CSV_LZ4FRAME: - case TFileFormatType::FORMAT_CSV_LZOP: - case TFileFormatType::FORMAT_CSV_DEFLATE: - _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, _cur_decompressor, size, - _line_delimiter, _line_delimiter_length); - break; - case TFileFormatType::FORMAT_PROTO: - _cur_line_reader = new PlainBinaryLineReader(_real_reader); - break; - default: { - return Status::InternalError("Unknown format type, cannot init line reader, type={}", - range.format_type); - } - } - - _cur_line_reader_eof = false; - - return Status::OK(); -} - -void BrokerScanner::close() { - BaseScanner::close(); - if (_cur_decompressor != nullptr) { - delete _cur_decompressor; - _cur_decompressor = nullptr; - } - - if (_cur_line_reader != nullptr) { - delete _cur_line_reader; - _cur_line_reader = nullptr; - } -} - -void BrokerScanner::split_line(const Slice& line) { - _split_values.clear(); - if (_file_format_type == TFileFormatType::FORMAT_PROTO) { - PDataRow** ptr = reinterpret_cast(line.data); - PDataRow* row = *ptr; - for (const PDataColumn& col : (row)->col()) { - int len = col.value().size(); - uint8_t* buf = new uint8_t[len]; - memcpy(buf, col.value().c_str(), len); - _split_values.emplace_back(buf, len); - } - delete row; - delete[] ptr; - } else { - const char* value = line.data; - size_t start = 0; // point to the start pos of next col value. - size_t curpos = 0; // point to the start pos of separator matching sequence. - size_t p1 = 0; // point to the current pos of separator matching sequence. - size_t non_space = 0; // point to the last pos of non_space character. - - // Separator: AAAA - // - // p1 - // â–¼ - // AAAA - // 1000AAAA2000AAAA - // â–² â–² - // Start │ - // curpos - - while (curpos < line.size) { - if (curpos + p1 == line.size || *(value + curpos + p1) != _value_separator[p1]) { - // Not match, move forward: - curpos += (p1 == 0 ? 1 : p1); - p1 = 0; - } else { - p1++; - if (p1 == _value_separator_length) { - // Match a separator - non_space = curpos; - // Trim tailing spaces. Be consistent with hive and trino's behavior. - if (_state->trim_tailing_spaces_for_external_table_query()) { - while (non_space > start && *(value + non_space - 1) == ' ') { - non_space--; - } - } - _split_values.emplace_back(value + start, non_space - start); - start = curpos + _value_separator_length; - curpos = start; - p1 = 0; - non_space = 0; - } - } - } - - CHECK(curpos == line.size) << curpos << " vs " << line.size; - non_space = curpos; - if (_state->trim_tailing_spaces_for_external_table_query()) { - while (non_space > start && *(value + non_space - 1) == ' ') { - non_space--; - } - } - _split_values.emplace_back(value + start, non_space - start); - } -} - -void BrokerScanner::fill_fix_length_string(const Slice& value, MemPool* pool, char** new_value_p, - const int new_value_length) { - if (new_value_length != 0 && value.size < new_value_length) { - *new_value_p = reinterpret_cast(pool->allocate(new_value_length)); - - // 'value' is guaranteed not to be nullptr - memcpy(*new_value_p, value.data, value.size); - for (int i = value.size; i < new_value_length; ++i) { - (*new_value_p)[i] = '\0'; - } - } -} - -// Following format are included. -// .123 1.23 123. -1.23 -// ATTN: The decimal point and (for negative numbers) the "-" sign are not counted. -// like '.123', it will be regarded as '0.123', but it match decimal(3, 3) -bool BrokerScanner::check_decimal_input(const Slice& slice, int precision, int scale, - std::stringstream* error_msg) { - const char* value = slice.data; - size_t value_length = slice.size; - - if (value_length > (precision + 2)) { - (*error_msg) << "the length of decimal value is overflow. " - << "precision in schema: (" << precision << ", " << scale << "); " - << "value: [" << slice.to_string() << "]; " - << "str actual length: " << value_length << ";"; - return false; - } - - // ignore leading spaces and trailing spaces - int begin_index = 0; - while (begin_index < value_length && std::isspace(value[begin_index])) { - ++begin_index; - } - int end_index = value_length - 1; - while (end_index >= begin_index && std::isspace(value[end_index])) { - --end_index; - } - - if (value[begin_index] == '+' || value[begin_index] == '-') { - ++begin_index; - } - - int point_index = -1; - for (int i = begin_index; i <= end_index; ++i) { - if (value[i] == '.') { - point_index = i; - } - } - - int value_int_len = 0; - int value_frac_len = 0; - value_int_len = point_index - begin_index; - value_frac_len = end_index - point_index; - - if (point_index == -1) { - // an int value: like 123 - value_int_len = end_index - begin_index + 1; - value_frac_len = 0; - } else { - value_int_len = point_index - begin_index; - value_frac_len = end_index - point_index; - } - - if (value_int_len > (precision - scale)) { - (*error_msg) << "the int part length longer than schema precision [" << precision << "]. " - << "value [" << slice.to_string() << "]. "; - return false; - } else if (value_frac_len > scale) { - (*error_msg) << "the frac part length longer than schema scale [" << scale << "]. " - << "value [" << slice.to_string() << "]. "; - return false; - } - return true; -} - -// Convert one row to this tuple -Status BrokerScanner::_convert_one_row(const Slice& line, Tuple* tuple, MemPool* tuple_pool, - bool* fill_tuple) { - RETURN_IF_ERROR(_line_to_src_tuple(line)); - if (!_success) { - // If not success, which means we met an invalid row, return. - *fill_tuple = false; - return Status::OK(); - } - - return fill_dest_tuple(tuple, tuple_pool, fill_tuple); -} - -Status BrokerScanner::_line_split_to_values(const Slice& line) { - bool is_proto_format = _file_format_type == TFileFormatType::FORMAT_PROTO; - if (!is_proto_format && !validate_utf8(line.data, line.size)) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - []() -> std::string { return "Unable to display"; }, - []() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "Unable to display"); - return fmt::to_string(error_msg); - }, - &_scanner_eof)); - _counter->num_rows_filtered++; - _success = false; - return Status::OK(); - } - - split_line(line); - - // range of current file - const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); - bool read_by_column_def = false; - if (range.__isset.read_by_column_def) { - read_by_column_def = range.read_by_column_def; - } - const std::vector& columns_from_path = range.columns_from_path; - // read data by column definition, resize _split_values to _src_solt_size - if (read_by_column_def) { - // fill slots by NULL - while (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { - _split_values.emplace_back(_split_values.back().get_data(), 0); - } - // remove redundant slots - while (_split_values.size() + columns_from_path.size() > _src_slot_descs.size()) { - _split_values.pop_back(); - } - } else { - if (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { - return is_proto_format ? "" : std::string(line.data, line.size); - }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", - "actual column number is less than schema column number."); - fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", - _split_values.size(), _value_separator); - fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", - _line_delimiter, _src_slot_descs.size()); - return fmt::to_string(error_msg); - }, - &_scanner_eof)); - _counter->num_rows_filtered++; - _success = false; - return Status::OK(); - } else if (_split_values.size() + columns_from_path.size() > _src_slot_descs.size()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { - return is_proto_format ? "" : std::string(line.data, line.size); - }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", - "actual column number is more than schema column number."); - fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", - _split_values.size(), _value_separator); - fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", - _line_delimiter, _src_slot_descs.size()); - return fmt::to_string(error_msg); - }, - &_scanner_eof)); - _counter->num_rows_filtered++; - _success = false; - return Status::OK(); - } - } - - _success = true; - return Status::OK(); -} - -// Convert one row to this tuple -Status BrokerScanner::_line_to_src_tuple(const Slice& line) { - RETURN_IF_ERROR(_line_split_to_values(line)); - if (!_success) { - return Status::OK(); - } - - for (int i = 0; i < _split_values.size(); ++i) { - auto slot_desc = _src_slot_descs[i]; - const Slice& value = _split_values[i]; - if (slot_desc->is_nullable() && is_null(value)) { - _src_tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } - _src_tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = value.data; - str_slot->len = value.size; - } - - const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); - if (range.__isset.num_of_columns_from_file) { - fill_slots_of_columns_from_path(range.num_of_columns_from_file, range.columns_from_path); - } - - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/broker_scanner.h b/be/src/exec/broker_scanner.h deleted file mode 100644 index a443443b4d..0000000000 --- a/be/src/exec/broker_scanner.h +++ /dev/null @@ -1,127 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/base_scanner.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/mem_pool.h" -#include "util/runtime_profile.h" -#include "util/slice.h" - -namespace doris { - -class Tuple; -class SlotDescriptor; -struct Slice; -class TextConverter; -class FileReader; -class LineReader; -class Decompressor; -class RuntimeState; -class ExprContext; -class TupleDescriptor; -class TupleRow; -class RowDescriptor; -class RuntimeProfile; -class StreamLoadPipe; - -// Broker scanner convert the data read from broker to doris's tuple. -class BrokerScanner : public BaseScanner { -public: - BrokerScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter); - ~BrokerScanner() override; - - // Open this scanner, will initialize information need to - Status open() override; - - // Get next tuple - virtual Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, - bool* fill_tuple) override; - - Status get_next(vectorized::Block* block, bool* eof) override { - return Status::NotSupported("Not Implemented get block"); - } - - // Close this scanner - void close() override; - -protected: - // Read next buffer from reader - Status open_next_reader(); - - Status _line_to_src_tuple(const Slice& line); - - Status _line_split_to_values(const Slice& line); - -private: - Status open_file_reader(); - Status create_decompressor(TFileFormatType::type type); - Status open_line_reader(); - - // Split one text line to values - void split_line(const Slice& line); - - void fill_fix_length_string(const Slice& value, MemPool* pool, char** new_value_p, - int new_value_length); - - bool check_decimal_input(const Slice& value, int precision, int scale, - std::stringstream* error_msg); - - // Convert one row to one tuple - // 'ptr' and 'len' is csv text line - // output is tuple - Status _convert_one_row(const Slice& line, Tuple* tuple, MemPool* tuple_pool, bool* fill_tuple); - -protected: - std::string _value_separator; - std::string _line_delimiter; - TFileFormatType::type _file_format_type; - int _value_separator_length; - int _line_delimiter_length; - - // Reader - // _cur_file_reader_s is for stream load pipe reader, - // and _cur_file_reader is for other file reader. - // TODO: refactor this to use only shared_ptr or unique_ptr - std::unique_ptr _cur_file_reader; - std::shared_ptr _cur_file_reader_s; - FileReader* _real_reader; - LineReader* _cur_line_reader; - Decompressor* _cur_decompressor; - bool _cur_line_reader_eof; - - // When we fetch range start from 0, header_type="csv_with_names" skip first line - // When we fetch range start from 0, header_type="csv_with_names_and_types" skip first two line - // When we fetch range doesn't start from 0 will always skip the first line - int _skip_lines; - - std::vector _split_values; -}; - -} // namespace doris diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp deleted file mode 100644 index b4e25f8b1d..0000000000 --- a/be/src/exec/cross_join_node.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/cross_join_node.h" - -#include - -#include "exprs/expr.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/runtime_profile.h" - -namespace doris { - -CrossJoinNode::CrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : BlockingJoinNode("CrossJoinNode", TJoinOp::CROSS_JOIN, pool, tnode, descs) {} - -Status CrossJoinNode::prepare(RuntimeState* state) { - DCHECK(_join_op == TJoinOp::CROSS_JOIN); - RETURN_IF_ERROR(BlockingJoinNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _build_batch_pool.reset(new ObjectPool()); - return Status::OK(); -} - -Status CrossJoinNode::close(RuntimeState* state) { - // avoid double close - if (is_closed()) { - return Status::OK(); - } - _build_batches.reset(); - _build_batch_pool.reset(); - BlockingJoinNode::close(state); - return Status::OK(); -} - -Status CrossJoinNode::construct_build_side(RuntimeState* state) { - // Do a full scan of child(1) and store all build row batches. - RETURN_IF_ERROR(child(1)->open(state)); - - while (true) { - RowBatch* batch = - _build_batch_pool->add(new RowBatch(child(1)->row_desc(), state->batch_size())); - - RETURN_IF_CANCELLED(state); - // TODO(zhaochun): - // RETURN_IF_ERROR(state->CheckQueryState()); - bool eos = false; - RETURN_IF_ERROR(child(1)->get_next(state, batch, &eos)); - - SCOPED_TIMER(_build_timer); - _build_batches.add_row_batch(batch); - VLOG_ROW << build_list_debug_string(); - COUNTER_SET(_build_row_counter, static_cast(_build_batches.total_num_rows())); - - if (eos) { - break; - } - } - - return Status::OK(); -} - -void CrossJoinNode::init_get_next(TupleRow* first_left_row) { - _current_build_row = _build_batches.iterator(); -} - -Status CrossJoinNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - *eos = false; - // TOOD(zhaochun) - // RETURN_IF_ERROR(state->check_query_state()); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit() || _eos) { - *eos = true; - return Status::OK(); - } - - ScopedTimer timer(_left_child_timer); - - while (!_eos) { - // Compute max rows that should be added to output_batch - int64_t max_added_rows = output_batch->capacity() - output_batch->num_rows(); - - if (limit() != -1) { - max_added_rows = std::min(max_added_rows, limit() - rows_returned()); - } - - // Continue processing this row batch - _num_rows_returned += - process_left_child_batch(output_batch, _left_batch.get(), max_added_rows); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - if (reached_limit() || output_batch->is_full()) { - *eos = reached_limit(); - break; - } - - // Check to see if we're done processing the current left child batch - if (_current_build_row.at_end() && _left_batch_pos == _left_batch->num_rows()) { - _left_batch->transfer_resource_ownership(output_batch); - _left_batch_pos = 0; - - if (output_batch->is_full()) { - break; - } - - if (_left_side_eos) { - *eos = _eos = true; - break; - } else { - timer.stop(); - RETURN_IF_ERROR(child(0)->get_next(state, _left_batch.get(), &_left_side_eos)); - timer.start(); - COUNTER_UPDATE(_left_child_row_counter, _left_batch->num_rows()); - } - } - } - - return Status::OK(); -} - -std::string CrossJoinNode::build_list_debug_string() { - std::stringstream out; - out << "BuildList("; - out << _build_batches.debug_string(child(1)->row_desc()); - out << ")"; - return out.str(); -} - -// TODO: this can be replaced with a codegen'd function -int CrossJoinNode::process_left_child_batch(RowBatch* output_batch, RowBatch* batch, - int max_added_rows) { - int row_idx = output_batch->add_rows(max_added_rows); - DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX); - uint8_t* output_row_mem = reinterpret_cast(output_batch->get_row(row_idx)); - TupleRow* output_row = reinterpret_cast(output_row_mem); - - int rows_returned = 0; - ExprContext* const* ctxs = &_conjunct_ctxs[0]; - int ctx_size = _conjunct_ctxs.size(); - - while (true) { - while (!_current_build_row.at_end()) { - create_output_row(output_row, _current_left_child_row, _current_build_row.get_row()); - _current_build_row.next(); - - if (!eval_conjuncts(ctxs, ctx_size, output_row)) { - continue; - } - - ++rows_returned; - - // Filled up out batch or hit limit - if (UNLIKELY(rows_returned == max_added_rows)) { - output_batch->commit_rows(rows_returned); - return rows_returned; - } - - // Advance to next out row - output_row_mem += output_batch->row_byte_size(); - output_row = reinterpret_cast(output_row_mem); - } - - DCHECK(_current_build_row.at_end()); - - // Advance to the next row in the left child batch - if (UNLIKELY(_left_batch_pos == batch->num_rows())) { - output_batch->commit_rows(rows_returned); - return rows_returned; - } - - _current_left_child_row = batch->get_row(_left_batch_pos++); - _current_build_row = _build_batches.iterator(); - } - - output_batch->commit_rows(rows_returned); - return rows_returned; -} -} // namespace doris diff --git a/be/src/exec/cross_join_node.h b/be/src/exec/cross_join_node.h deleted file mode 100644 index dc8cfac171..0000000000 --- a/be/src/exec/cross_join_node.h +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "exec/blocking_join_node.h" -#include "exec/row_batch_list.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" - -namespace doris { - -class RowBatch; -class TupleRow; - -// Node for cross joins. -// Iterates over the left child rows and then the right child rows and, for -// each combination, writes the output row if the conjuncts are satisfied. The -// build batches are kept in a list that is fully constructed from the right child in -// construct_build_side() (called by BlockingJoinNode::open()) while rows are fetched from -// the left child as necessary in get_next(). -class CrossJoinNode : public BlockingJoinNode { -public: - CrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status prepare(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -protected: - virtual void init_get_next(TupleRow* first_left_row); - virtual Status construct_build_side(RuntimeState* state); - -private: - // Object pool for build RowBatches, stores all BuildBatches in _build_rows - std::unique_ptr _build_batch_pool; - // List of build batches, constructed in prepare() - RowBatchList _build_batches; - RowBatchList::TupleRowIterator _current_build_row; - - // Processes a batch from the left child. - // output_batch: the batch for resulting tuple rows - // batch: the batch from the left child to process. This function can be called to - // continue processing a batch in the middle - // max_added_rows: maximum rows that can be added to output_batch - // return the number of rows added to output_batch - int process_left_child_batch(RowBatch* output_batch, RowBatch* batch, int max_added_rows); - - // Returns a debug string for _build_rows. This is used for debugging during the - // build list construction and before doing the join. - std::string build_list_debug_string(); -}; - -} // namespace doris diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp deleted file mode 100644 index ab4b63320d..0000000000 --- a/be/src/exec/csv_scan_node.cpp +++ /dev/null @@ -1,629 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "csv_scan_node.h" - -#include - -#include -#include - -#include "exec/text_converter.hpp" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/hash_util.hpp" -#include "util/runtime_profile.h" - -namespace doris { - -class StringRef { -public: - StringRef(char const* const begin, int const size) : _begin(begin), _size(size) {} - - ~StringRef() { - // No need to delete _begin, because it only record the index in a std::string. - // The c-string will be released along with the std::string object. - } - - int size() const { return _size; } - int length() const { return _size; } - - char const* c_str() const { return _begin; } - char const* begin() const { return _begin; } - - char const* end() const { return _begin + _size; } - -private: - char const* _begin; - int _size; -}; - -void split_line(const std::string& str, char delimiter, std::vector& result) { - enum State { IN_DELIM = 1, IN_TOKEN = 0 }; - - // line-begin char and line-end char are considered to be 'delimeter' - State state = IN_DELIM; - char const* p_begin = str.c_str(); // Begin of either a token or a delimiter - for (string::const_iterator it = str.begin(); it != str.end(); ++it) { - State const new_state = (*it == delimiter ? IN_DELIM : IN_TOKEN); - if (new_state != state) { - if (new_state == IN_DELIM) { - result.push_back(StringRef(p_begin, &*it - p_begin)); - } - p_begin = &*it; - } else if (new_state == IN_DELIM) { - result.push_back(StringRef(&*p_begin, 0)); - p_begin = &*it; - } - - state = new_state; - } - - result.push_back(StringRef(p_begin, (&*str.end() - p_begin) - state)); -} - -CsvScanNode::CsvScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _tuple_id(tnode.csv_scan_node.tuple_id), - _file_paths(tnode.csv_scan_node.file_paths), - _column_separator(tnode.csv_scan_node.column_separator), - _column_type_map(tnode.csv_scan_node.column_type_mapping), - _column_function_map(tnode.csv_scan_node.column_function_mapping), - _columns(tnode.csv_scan_node.columns), - _unspecified_columns(tnode.csv_scan_node.unspecified_columns), - _default_values(tnode.csv_scan_node.default_values), - _is_init(false), - _tuple_desc(nullptr), - _slot_num(0), - _tuple_pool(nullptr), - _text_converter(nullptr), - _tuple(nullptr), - _runtime_state(nullptr), - _split_check_timer(nullptr), - _split_line_timer(nullptr), - _hll_column_num(0) { - // do nothing - LOG(INFO) << "csv scan node: " << apache::thrift::ThriftDebugString(tnode).c_str(); -} - -CsvScanNode::~CsvScanNode() { - // do nothing -} - -Status CsvScanNode::init(const TPlanNode& tnode, RuntimeState* state) { - return ExecNode::init(tnode, state); -} - -Status CsvScanNode::prepare(RuntimeState* state) { - VLOG_CRITICAL << "CsvScanNode::Prepare"; - - if (_is_init) { - return Status::OK(); - } - - if (nullptr == state) { - return Status::InternalError("input runtime_state pointer is nullptr."); - } - - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - // add timer - _split_check_timer = ADD_TIMER(_runtime_profile, "split check timer"); - _split_line_timer = ADD_TIMER(_runtime_profile, "split line timer"); - - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - if (nullptr == _tuple_desc) { - return Status::InternalError("Failed to get tuple descriptor."); - } - - _slot_num = _tuple_desc->slots().size(); - const OlapTableDescriptor* csv_table = - static_cast(_tuple_desc->table_desc()); - if (nullptr == csv_table) { - return Status::InternalError("csv table pointer is nullptr."); - } - - // - for (int i = 0; i < _slot_num; ++i) { - SlotDescriptor* slot = _tuple_desc->slots()[i]; - const std::string& column_name = slot->col_name(); - - if (slot->type().type == TYPE_HLL) { - TMiniLoadEtlFunction& function = _column_function_map[column_name]; - if (check_hll_function(function) == false) { - return Status::InternalError("Function name or param error."); - } - _hll_column_num++; - } - - // NOTE: not all the columns in '_columns' is exist in table schema - if (_columns.end() != std::find(_columns.begin(), _columns.end(), column_name)) { - _column_slot_map[column_name] = slot; - } else { - _column_slot_map[column_name] = nullptr; - } - - // add 'unspecified_columns' which have default values - if (_unspecified_columns.end() != - std::find(_unspecified_columns.begin(), _unspecified_columns.end(), column_name)) { - _column_slot_map[column_name] = slot; - } - } - - _column_type_vec.resize(_columns.size()); - for (int i = 0; i < _columns.size(); ++i) { - const std::string& column_name = _columns[i]; - SlotDescriptor* slot = _column_slot_map[column_name]; - _column_slot_vec.push_back(slot); - - if (slot != nullptr) { - _column_type_vec[i] = _column_type_map[column_name]; - } - } - for (int i = 0; i < _default_values.size(); ++i) { - const std::string& column_name = _unspecified_columns[i]; - SlotDescriptor* slot = _column_slot_map[column_name]; - _unspecified_colomn_slot_vec.push_back(slot); - _unspecified_colomn_type_vec.push_back(_column_type_map[column_name]); - } - - // new one scanner - _csv_scanner.reset(new (std::nothrow) CsvScanner(_file_paths)); - if (_csv_scanner.get() == nullptr) { - return Status::InternalError("new a csv scanner failed."); - } - - _tuple_pool.reset(new (std::nothrow) MemPool()); - if (_tuple_pool.get() == nullptr) { - return Status::InternalError("new a mem pool failed."); - } - - _text_converter.reset(new (std::nothrow) TextConverter('\\')); - if (_text_converter.get() == nullptr) { - return Status::InternalError("new a text convertor failed."); - } - - _is_init = true; - return Status::OK(); -} - -Status CsvScanNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - VLOG_CRITICAL << "CsvScanNode::Open"; - - if (nullptr == state) { - return Status::InternalError("input pointer is nullptr."); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - _runtime_state = state; - - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(_csv_scanner->open()); - - return Status::OK(); -} - -Status CsvScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - VLOG_CRITICAL << "CsvScanNode::GetNext"; - if (nullptr == state || nullptr == row_batch || nullptr == eos) { - return Status::InternalError("input is nullptr pointer"); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - // create new tuple buffer for row_batch - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size); - - if (nullptr == tuple_buffer) { - return Status::InternalError("Allocate memory failed."); - } - - _tuple = reinterpret_cast(tuple_buffer); - memset(_tuple, 0, _tuple_desc->num_null_bytes()); - - // Indicates whether there are more rows to process. - bool csv_eos = false; - - // NOTE: not like Mysql, we need check correctness. - while (!csv_eos) { - RETURN_IF_CANCELLED(state); - - if (reached_limit() || row_batch->is_full()) { - // hang on to last allocated chunk in pool, we'll keep writing into it in the - // next get_next() call - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit()); - *eos = reached_limit(); - return Status::OK(); - } - - // read csv - std::string line; - RETURN_IF_ERROR(_csv_scanner->get_next_row(&line, &csv_eos)); - //VLOG_ROW << "line readed: [" << line << "]"; - if (line.empty()) { - continue; - } - // split & check line & fill default value - bool is_success = split_check_fill(line, state); - ++_num_rows_load_total; - if (!is_success) { - ++_num_rows_load_filtered; - continue; - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - // scan node is the first tuple of tuple row - row->set_tuple(0, _tuple); - - { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - char* new_tuple = reinterpret_cast(_tuple); - new_tuple += _tuple_desc->byte_size(); - _tuple = reinterpret_cast(new_tuple); - } - } - state->update_num_rows_load_total(_num_rows_load_total); - state->update_num_rows_load_filtered(_num_rows_load_filtered); - VLOG_ROW << "normal_row_number: " << state->num_rows_load_success() - << "; error_row_number: " << state->num_rows_load_filtered() << std::endl; - - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); - - *eos = csv_eos; - return Status::OK(); -} - -Status CsvScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - VLOG_CRITICAL << "CsvScanNode::Close"; - - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - RETURN_IF_ERROR(ExecNode::close(state)); - - if (state->num_rows_load_success() == 0) { - std::stringstream error_msg; - error_msg << "Read zero normal line file. "; - LOG(INFO) << error_msg.str(); - return Status::InternalError(error_msg.str()); - } - - // only write summary line if there are error lines - if (_num_rows_load_filtered > 0) { - // Summary normal line and error line number info - std::stringstream summary_msg; - summary_msg << "error line: " << _num_rows_load_filtered - << "; normal line: " << state->num_rows_load_success(); - LOG(INFO) << summary_msg.str(); - } - - return Status::OK(); -} - -void CsvScanNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "csvScanNode(tupleid=" << _tuple_id; - *out << ")" << std::endl; - - for (int i = 0; i < _children.size(); ++i) { - _children[i]->debug_string(indentation_level + 1, out); - } -} - -Status CsvScanNode::set_scan_ranges(const std::vector& scan_ranges) { - return Status::OK(); -} - -void CsvScanNode::fill_fix_length_string(const char* value, const int value_length, MemPool* pool, - char** new_value_p, const int new_value_length) { - if (new_value_length != 0 && value_length < new_value_length) { - DCHECK(pool != nullptr); - *new_value_p = reinterpret_cast(pool->allocate(new_value_length)); - - // 'value' is guaranteed not to be nullptr - memcpy(*new_value_p, value, value_length); - for (int i = value_length; i < new_value_length; ++i) { - (*new_value_p)[i] = '\0'; - } - VLOG_ROW << "Fill fix length string. " - << "value: [" << std::string(value, value_length) << "]; " - << "value_length: " << value_length << "; " - << "*new_value_p: [" << *new_value_p << "]; " - << "new value length: " << new_value_length << std::endl; - } -} - -// Following format are included. -// .123 1.23 123. -1.23 -// ATTN: The decimal point and (for negative numbers) the "-" sign are not counted. -// like '.123', it will be regarded as '0.123', but it match decimal(3, 3) -bool CsvScanNode::check_decimal_input(const char* value, const int value_length, - const int precision, const int scale, - std::stringstream* error_msg) { - if (value_length > (precision + 2)) { - (*error_msg) << "the length of decimal value is overflow. " - << "precision in schema: (" << precision << ", " << scale << "); " - << "value: [" << std::string(value, value_length) << "]; " - << "str actual length: " << value_length << ";"; - return false; - } - - // ignore leading spaces and trailing spaces - int begin_index = 0; - while (begin_index < value_length && std::isspace(value[begin_index])) { - ++begin_index; - } - int end_index = value_length - 1; - while (end_index >= begin_index && std::isspace(value[end_index])) { - --end_index; - } - - if (value[begin_index] == '+' || value[begin_index] == '-') { - ++begin_index; - } - - int point_index = -1; - for (int i = begin_index; i <= end_index; ++i) { - if (value[i] == '.') { - point_index = i; - } - } - - int value_int_len = 0; - int value_frac_len = 0; - value_int_len = point_index - begin_index; - value_frac_len = end_index - point_index; - - if (point_index == -1) { - // an int value: like 123 - value_int_len = end_index - begin_index + 1; - value_frac_len = 0; - } else { - value_int_len = point_index - begin_index; - value_frac_len = end_index - point_index; - } - - if (value_int_len > (precision - scale)) { - (*error_msg) << "the int part length longer than schema precision [" << precision << "]. " - << "value [" << std::string(value, value_length) << "]. "; - return false; - } else if (value_frac_len > scale) { - (*error_msg) << "the frac part length longer than schema scale [" << scale << "]. " - << "value [" << std::string(value, value_length) << "]. "; - return false; - } - return true; -} - -static bool is_null(const char* value, int value_length) { - return value_length == 2 && value[0] == '\\' && value[1] == 'N'; -} - -// Writes a slot in _tuple from an value containing text data. -bool CsvScanNode::check_and_write_text_slot(const std::string& column_name, - const TColumnType& column_type, const char* value, - int value_length, const SlotDescriptor* slot, - RuntimeState* state, std::stringstream* error_msg) { - if (value_length == 0 && !slot->type().is_string_type()) { - (*error_msg) << "the length of input should not be 0. " - << "column_name: " << column_name << "; " - << "type: " << slot->type() << "; " - << "input_str: [" << std::string(value, value_length) << "]."; - return false; - } - - if (is_null(value, value_length)) { - if (slot->is_nullable()) { - _tuple->set_null(slot->null_indicator_offset()); - return true; - } else { - (*error_msg) << "value cannot be null. column name: " << column_name - << "; type: " << slot->type() << "; input_str: [" - << std::string(value, value_length) << "]."; - return false; - } - } - - char* value_to_convert = const_cast(value); - int value_to_convert_length = value_length; - - // Fill all the spaces if it is 'TYPE_CHAR' type - if (slot->type().is_string_type()) { - int char_len = column_type.len; - if (slot->type().type != TYPE_HLL && value_length > char_len) { - (*error_msg) << "the length of input is too long than schema. " - << "column_name: " << column_name << "; " - << "input_str: [" << std::string(value, value_length) << "] " - << "type: " << slot->type() << "; " - << "schema length: " << char_len << "; " - << "actual length: " << value_length << "; "; - return false; - } - if (slot->type().type == TYPE_CHAR && value_length < char_len) { - fill_fix_length_string(value, value_length, _tuple_pool.get(), &value_to_convert, - char_len); - value_to_convert_length = char_len; - } - } else if (slot->type().is_decimal_v2_type()) { - int precision = column_type.precision; - int scale = column_type.scale; - bool is_success = check_decimal_input(value, value_length, precision, scale, error_msg); - if (is_success == false) { - return false; - } - } - - if (!_text_converter->write_slot(slot, _tuple, value_to_convert, value_to_convert_length, true, - false, _tuple_pool.get())) { - (*error_msg) << "convert csv string to " << slot->type() << " failed. " - << "column_name: " << column_name << "; " - << "input_str: [" << std::string(value, value_length) << "]; "; - return false; - } - - return true; -} - -bool CsvScanNode::split_check_fill(const std::string& line, RuntimeState* state) { - SCOPED_TIMER(_split_check_timer); - - std::stringstream error_msg; - // std::vector fields; - std::vector fields; - { - SCOPED_TIMER(_split_line_timer); - split_line(line, _column_separator[0], fields); - } - - if (_hll_column_num == 0 && fields.size() < _columns.size()) { - error_msg << "actual column number is less than schema column number. " - << "actual number: " << fields.size() << " ," - << "schema number: " << _columns.size() << "; "; - LOG(INFO) << error_msg.str(); - return false; - } else if (_hll_column_num == 0 && fields.size() > _columns.size()) { - error_msg << "actual column number is more than schema column number. " - << "actual number: " << fields.size() << " ," - << "schema number: " << _columns.size() << "; "; - LOG(INFO) << error_msg.str(); - return false; - } - - for (int i = 0; i < _columns.size(); ++i) { - const std::string& column_name = _columns[i]; - const SlotDescriptor* slot = _column_slot_vec[i]; - // ignore unspecified columns - if (slot == nullptr) { - continue; - } - - if (!slot->is_materialized()) { - continue; - } - - if (slot->type().type == TYPE_HLL) { - continue; - } - - const TColumnType& column_type = _column_type_vec[i]; - bool flag = check_and_write_text_slot(column_name, column_type, fields[i].c_str(), - fields[i].length(), slot, state, &error_msg); - - if (flag == false) { - LOG(INFO) << error_msg.str(); - return false; - } - } - - for (int i = 0; i < _unspecified_columns.size(); ++i) { - const std::string& column_name = _unspecified_columns[i]; - const SlotDescriptor* slot = _unspecified_colomn_slot_vec[i]; - if (slot == nullptr) { - continue; - } - - if (!slot->is_materialized()) { - continue; - } - - if (slot->type().type == TYPE_HLL) { - continue; - } - - const TColumnType& column_type = _unspecified_colomn_type_vec[i]; - bool flag = check_and_write_text_slot(column_name, column_type, _default_values[i].c_str(), - _default_values[i].length(), slot, state, &error_msg); - - if (flag == false) { - LOG(INFO) << error_msg.str(); - return false; - } - } - - for (auto iter = _column_function_map.begin(); iter != _column_function_map.end(); ++iter) { - TMiniLoadEtlFunction& function = iter->second; - const std::string& column_name = iter->first; - const SlotDescriptor* slot = _column_slot_map[column_name]; - const TColumnType& column_type = _column_type_map[column_name]; - std::string column_string = ""; - const char* src = fields[function.param_column_index].c_str(); - int src_column_len = fields[function.param_column_index].length(); - hll_hash(src, src_column_len, &column_string); - bool flag = check_and_write_text_slot(column_name, column_type, column_string.c_str(), - column_string.length(), slot, state, &error_msg); - if (flag == false) { - LOG(INFO) << error_msg.str(); - return false; - } - } - - return true; -} - -bool CsvScanNode::check_hll_function(TMiniLoadEtlFunction& function) { - if (function.function_name.empty() || function.function_name != "hll_hash" || - function.param_column_index < 0) { - return false; - } - return true; -} - -void CsvScanNode::hll_hash(const char* src, int len, std::string* result) { - std::string str(src, len); - if (str != "\\N") { - uint64_t hash = HashUtil::murmur_hash64A(src, len, HashUtil::MURMUR_SEED); - char buf[10]; - // expliclit set - buf[0] = HLL_DATA_EXPLICIT; - buf[1] = 1; - *((uint64_t*)(buf + 2)) = hash; - *result = std::string(buf, sizeof(buf)); - } else { - char buf[1]; - // empty set - buf[0] = HLL_DATA_EMPTY; - *result = std::string(buf, sizeof(buf)); - } -} - -} // end namespace doris diff --git a/be/src/exec/csv_scan_node.h b/be/src/exec/csv_scan_node.h deleted file mode 100644 index f89b75a1ff..0000000000 --- a/be/src/exec/csv_scan_node.h +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "exec/csv_scanner.h" -#include "exec/scan_node.h" -#include "runtime/descriptors.h" - -namespace doris { - -class TextConverter; -class Tuple; -class TupleDescriptor; -class RuntimeState; -class MemPool; -class Status; - -// Now, CsvScanNode and CsvScanner are only for unit test -class CsvScanNode : public ScanNode { -public: - CsvScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~CsvScanNode(); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - - // initialize _csv_scanner, and create _text_converter. - virtual Status prepare(RuntimeState* state); - - // Start CSV scan using _csv_scanner. - virtual Status open(RuntimeState* state); - - // Fill the next row batch by calling next() on the _csv_scanner, - // converting text data in CSV cells to binary data. - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - - // Release memory, report 'Counter', and report errors. - virtual Status close(RuntimeState* state); - - // No use in csv scan process - virtual Status set_scan_ranges(const std::vector& scan_ranges); - - // Write debug string of this into out. - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - bool check_and_write_text_slot(const std::string& column_name, const TColumnType& column_type, - const char* value, int value_length, const SlotDescriptor* slot, - RuntimeState* state, std::stringstream* error_msg); - - // split one line into fields, check every fields, fill every field into tuple - bool split_check_fill(const std::string& line, RuntimeState* state); - - void fill_fix_length_string(const char* value, int value_length, MemPool* pool, - char** new_value, int new_value_length); - bool check_decimal_input(const char* value, int value_length, int precision, int scale, - std::stringstream* error_msg); - - void hll_hash(const char* src, int len, std::string* result); - - bool check_hll_function(TMiniLoadEtlFunction& function); - - // Tuple id resolved in prepare() to set _tuple_desc; - TupleId _tuple_id; - - std::vector _file_paths; - - std::string _column_separator; - - std::map _column_type_map; - // mapping function - std::map _column_function_map; - - std::vector _columns; - // 'unspecified_columns' is map one-for-one to '_default_values' in the same order - std::vector _unspecified_columns; - std::vector _default_values; - - // Map one-for-one to '_columns' in the same order - std::vector _column_slot_vec; - std::vector _column_type_vec; - // Map one-for-one to '_unspecified_columns' in the same order - std::vector _unspecified_colomn_slot_vec; - std::vector _unspecified_colomn_type_vec; - - bool _is_init; - - // Descriptor of tuples read from CSV file. - const TupleDescriptor* _tuple_desc; - // Tuple index in tuple row. - int _slot_num; - - // Pool for allocating tuple data, including all varying-length slots. - std::unique_ptr _tuple_pool; - // Util class for doing real file reading - std::unique_ptr _csv_scanner; - // Helper class for converting text to other types; - std::unique_ptr _text_converter; - // Current tuple. - Tuple* _tuple; - // Current RuntimeState - RuntimeState* _runtime_state; - - int64_t _num_rows_load_total = 0L; - int64_t _num_rows_load_filtered = 0L; - - RuntimeProfile::Counter* _split_check_timer; - RuntimeProfile::Counter* _split_line_timer; - // count hll value num - int _hll_column_num; - std::map _column_slot_map; -}; - -} // end namespace doris diff --git a/be/src/exec/csv_scanner.cpp b/be/src/exec/csv_scanner.cpp deleted file mode 100644 index bdba765c83..0000000000 --- a/be/src/exec/csv_scanner.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/csv_scanner.h" - -#include - -#include "common/logging.h" - -namespace doris { -CsvScanner::CsvScanner(const std::vector& csv_file_paths) - : _is_open(false), - _file_paths(csv_file_paths), - _current_file(nullptr), - _current_file_idx(0) { - // do nothing -} - -CsvScanner::~CsvScanner() { - // close file - if (_current_file != nullptr) { - if (_current_file->is_open()) { - _current_file->close(); - } - delete _current_file; - _current_file = nullptr; - } -} - -Status CsvScanner::open() { - VLOG_CRITICAL << "CsvScanner::Connect"; - - if (_is_open) { - LOG(INFO) << "this scanner already opened"; - return Status::OK(); - } - - if (_file_paths.empty()) { - return Status::InternalError("no file specified."); - } - - _is_open = true; - return Status::OK(); -} - -// TODO(lingbin): read more than one line at a time to reduce IO comsumption -Status CsvScanner::get_next_row(std::string* line_str, bool* eos) { - if (_current_file == nullptr && _current_file_idx == _file_paths.size()) { - *eos = true; - return Status::OK(); - } - - if (_current_file == nullptr && _current_file_idx < _file_paths.size()) { - std::string& file_path = _file_paths[_current_file_idx]; - LOG(INFO) << "open csv file: [" << _current_file_idx << "] " << file_path; - - _current_file = new std::ifstream(file_path, std::ifstream::in); - if (!_current_file->is_open()) { - return Status::InternalError("Fail to read csv file: {}", file_path); - } - ++_current_file_idx; - } - - getline(*_current_file, *line_str); - if (_current_file->eof()) { - _current_file->close(); - delete _current_file; - _current_file = nullptr; - - if (_current_file_idx == _file_paths.size()) { - *eos = true; - return Status::OK(); - } - } - - *eos = false; - return Status::OK(); -} -} // end namespace doris diff --git a/be/src/exec/csv_scanner.h b/be/src/exec/csv_scanner.h deleted file mode 100644 index d55139aaab..0000000000 --- a/be/src/exec/csv_scanner.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "common/status.h" - -namespace doris { - -// Now, CsvScanNode and CsvScanner are only for unit test -class CsvScanner { -public: - CsvScanner(const std::vector& csv_file_paths); - ~CsvScanner(); - - Status open(); - Status get_next_row(std::string* line_str, bool* eos); - -private: - bool _is_open; - std::vector _file_paths; - // the current opened file - std::ifstream* _current_file; - int32_t _current_file_idx; -}; - -} // end namespace doris diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index 8362e89d7b..ecb9329f6b 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -24,15 +24,8 @@ #include #include -#include "exec/tablet_sink.h" #include "gen_cpp/PaloInternalService_types.h" -#include "runtime/data_stream_sender.h" -#include "runtime/export_sink.h" #include "runtime/memory_scratch_sink.h" -#include "runtime/mysql_table_sink.h" -#include "runtime/odbc_table_sink.h" -#include "runtime/result_file_sink.h" -#include "runtime/result_sink.h" #include "runtime/runtime_state.h" #include "vec/sink/vdata_stream_sender.h" #include "vec/sink/vjdbc_table_sink.h" @@ -66,9 +59,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink state, pool, params.sender_id, row_desc, thrift_sink.stream_sink, params.destinations, 16 * 1024, send_query_statistics_with_every_batch); } else { - tmp_sink = new DataStreamSender(pool, params.sender_id, row_desc, - thrift_sink.stream_sink, params.destinations, 16 * 1024, - send_query_statistics_with_every_batch); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } // RETURN_IF_ERROR(sender->prepare(state->obj_pool(), thrift_sink.stream_sink)); sink->reset(tmp_sink); @@ -84,7 +75,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink tmp_sink = new doris::vectorized::VResultSink(row_desc, output_exprs, thrift_sink.result_sink, 4096); } else { - tmp_sink = new ResultSink(row_desc, output_exprs, thrift_sink.result_sink, 1024); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } sink->reset(tmp_sink); break; @@ -112,14 +103,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink send_query_statistics_with_every_batch, output_exprs); } } else { - // Result file sink is not the top sink - if (params.__isset.destinations && params.destinations.size() > 0) { - tmp_sink = - new ResultFileSink(row_desc, output_exprs, thrift_sink.result_file_sink, - params.destinations, pool, params.sender_id, desc_tbl); - } else { - tmp_sink = new ResultFileSink(row_desc, output_exprs, thrift_sink.result_file_sink); - } + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } sink->reset(tmp_sink); @@ -144,9 +128,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink new doris::vectorized::VMysqlTableSink(pool, row_desc, output_exprs); sink->reset(vmysql_tbl_sink); } else { - // TODO: figure out good buffer size based on size of output row - MysqlTableSink* mysql_tbl_sink = new MysqlTableSink(pool, row_desc, output_exprs); - sink->reset(mysql_tbl_sink); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } break; #else @@ -161,7 +143,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink if (state->enable_vectorized_exec()) { sink->reset(new vectorized::VOdbcTableSink(pool, row_desc, output_exprs)); } else { - sink->reset(new OdbcTableSink(pool, row_desc, output_exprs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } break; } @@ -185,12 +167,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink } case TDataSinkType::EXPORT_SINK: { - if (!thrift_sink.__isset.export_sink) { - return Status::InternalError("Missing export sink sink."); - } - - std::unique_ptr export_sink(new ExportSink(pool, row_desc, output_exprs)); - sink->reset(export_sink.release()); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); break; } case TDataSinkType::OLAP_TABLE_SINK: { @@ -199,7 +176,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink if (state->enable_vectorized_exec()) { sink->reset(new stream_load::VOlapTableSink(pool, row_desc, output_exprs, &status)); } else { - sink->reset(new stream_load::OlapTableSink(pool, row_desc, output_exprs, &status)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } RETURN_IF_ERROR(status); break; diff --git a/be/src/exec/empty_set_node.cpp b/be/src/exec/empty_set_node.cpp deleted file mode 100644 index f4f5847417..0000000000 --- a/be/src/exec/empty_set_node.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/empty-set-node.cpp -// and modified by Doris - -#include "exec/empty_set_node.h" - -namespace doris { - -EmptySetNode::EmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs) {} - -Status EmptySetNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - *eos = true; - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/empty_set_node.h b/be/src/exec/empty_set_node.h deleted file mode 100644 index 3739a25a96..0000000000 --- a/be/src/exec/empty_set_node.h +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/empty-set-node.h -// and modified by Doris - -#pragma once - -#include "exec/exec_node.h" - -namespace doris { - -/// Node that returns an empty result set, i.e., just sets eos_ in GetNext(). -/// Corresponds to EmptySetNode.java in the FE. -class EmptySetNode : public ExecNode { -public: - EmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; -}; - -} // namespace doris diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp deleted file mode 100644 index 478f85b1a7..0000000000 --- a/be/src/exec/es_http_scan_node.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es_http_scan_node.h" - -#include -#include - -#include "common/object_pool.h" -#include "exec/es/es_predicate.h" -#include "exec/es/es_query_builder.h" -#include "exec/es/es_scan_reader.h" -#include "exec/es/es_scroll_query.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "service/backend_options.h" -#include "util/runtime_profile.h" - -namespace doris { - -EsHttpScanNode::EsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _tuple_id(tnode.es_scan_node.tuple_id), - _runtime_state(nullptr), - _tuple_desc(nullptr), - _num_running_scanners(0), - _scan_finished(false), - _eos(false), - _max_buffered_batches(1024), - _wait_scanner_timer(nullptr) {} - -EsHttpScanNode::~EsHttpScanNode() {} - -Status EsHttpScanNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ScanNode::init(tnode, state)); - - // use TEsScanNode - _properties = tnode.es_scan_node.properties; - - if (tnode.es_scan_node.__isset.docvalue_context) { - _docvalue_context = tnode.es_scan_node.docvalue_context; - } - - if (tnode.es_scan_node.__isset.fields_context) { - _fields_context = tnode.es_scan_node.fields_context; - } - return Status::OK(); -} - -Status EsHttpScanNode::prepare(RuntimeState* state) { - VLOG_QUERY << "EsHttpScanNode prepare"; - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode")); - runtime_profile()->add_child(_scanner_profile.get(), true, nullptr); - - _runtime_state = state; - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - if (_tuple_desc == nullptr) { - return Status::InternalError("Failed to get tuple descriptor, _tuple_id={}", _tuple_id); - } - - // set up column name vector for ESScrollQueryBuilder - for (auto slot_desc : _tuple_desc->slots()) { - if (!slot_desc->is_materialized()) { - continue; - } - _column_names.push_back(slot_desc->col_name()); - } - - _wait_scanner_timer = ADD_TIMER(runtime_profile(), "WaitScannerTime"); - - return Status::OK(); -} - -// build predicate -Status EsHttpScanNode::build_conjuncts_list() { - Status status = Status::OK(); - _conjunct_to_predicate.resize(_conjunct_ctxs.size()); - - for (int i = 0; i < _conjunct_ctxs.size(); ++i) { - EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool)); - predicate->set_field_context(_fields_context); - status = predicate->build_disjuncts_list(); - if (status.ok()) { - _conjunct_to_predicate[i] = _predicate_to_conjunct.size(); - _predicate_to_conjunct.push_back(i); - - _predicates.push_back(predicate); - } else { - _conjunct_to_predicate[i] = -1; - - VLOG_CRITICAL << status; - status = predicate->get_es_query_status(); - if (!status.ok()) { - LOG(WARNING) << status; - return status; - } - } - } - - return Status::OK(); -} - -Status EsHttpScanNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - - if (_properties.find(ESScanReader::KEY_QUERY_DSL) != _properties.end()) { - RETURN_IF_ERROR(start_scanners()); - return Status::OK(); - } - - // if conjunct is constant, compute direct and set eos = true - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { - void* value = _conjunct_ctxs[conj_idx]->get_value(nullptr); - if (value == nullptr || *reinterpret_cast(value) == false) { - _eos = true; - } - } - } - - RETURN_IF_ERROR(build_conjuncts_list()); - - // remove those predicates which ES cannot support - std::vector list; - BooleanQueryBuilder::validate(_predicates, &list); - - DCHECK(list.size() == _predicate_to_conjunct.size()); - for (int i = list.size() - 1; i >= 0; i--) { - if (!list[i]) { - _predicate_to_conjunct.erase(_predicate_to_conjunct.begin() + i); - _predicates.erase(_predicates.begin() + i); - } - } - - // filter the conjuncts and ES will process them later - for (int i = _predicate_to_conjunct.size() - 1; i >= 0; i--) { - int conjunct_index = _predicate_to_conjunct[i]; - _conjunct_ctxs[conjunct_index]->close(_runtime_state); - _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); - } - - auto checker = [&](int index) { - return _conjunct_to_predicate[index] != -1 && list[_conjunct_to_predicate[index]]; - }; - _peel_pushed_vconjunct(state, checker); - - RETURN_IF_ERROR(start_scanners()); - - return Status::OK(); -} - -Status EsHttpScanNode::start_scanners() { - { - std::unique_lock l(_batch_queue_lock); - _num_running_scanners = _scan_ranges.size(); - } - - _scanners_status.resize(_scan_ranges.size()); - for (int i = 0; i < _scan_ranges.size(); i++) { - _scanner_threads.emplace_back(&EsHttpScanNode::scanner_worker, this, i, _scan_ranges.size(), - std::ref(_scanners_status[i])); - } - return Status::OK(); -} - -Status EsHttpScanNode::collect_scanners_status() { - // NOTE. if open() was called, but set_range() was NOT called for some reason. - // then close() was called. - // there would cause a core because _scanners_status's iterator was in [0, _scan_ranges) other than [0, _scanners_status) - // it is said that the fragment-call-frame is calling scan-node in this way.... - // in my options, it's better fixed in fragment-call-frame. e.g. call close() according the return value of open() - for (int i = 0; i < _scanners_status.size(); i++) { - std::future f = _scanners_status[i].get_future(); - RETURN_IF_ERROR(f.get()); - } - return Status::OK(); -} - -Status EsHttpScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - if (state->is_cancelled()) { - std::unique_lock l(_batch_queue_lock); - if (update_status(Status::Cancelled("Cancelled"))) { - _queue_writer_cond.notify_all(); - } - } - - if (_eos) { - *eos = true; - return Status::OK(); - } - - if (_scan_finished.load()) { - *eos = true; - return Status::OK(); - } - - std::shared_ptr scanner_batch; - { - std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && !_runtime_state->is_cancelled() && - _num_running_scanners > 0 && _batch_queue.empty()) { - SCOPED_TIMER(_wait_scanner_timer); - _queue_reader_cond.wait_for(l, std::chrono::seconds(1)); - } - if (!_process_status.ok()) { - // Some scanner process failed. - return _process_status; - } - if (_runtime_state->is_cancelled()) { - if (update_status(Status::Cancelled("Cancelled"))) { - _queue_writer_cond.notify_all(); - } - return _process_status; - } - if (!_batch_queue.empty()) { - scanner_batch = _batch_queue.front(); - _batch_queue.pop_front(); - } - } - - // All scanner has been finished, and all cached batch has been read - if (scanner_batch == nullptr) { - _scan_finished.store(true); - *eos = true; - return Status::OK(); - } - - // notify one scanner - _queue_writer_cond.notify_one(); - - // get scanner's batch memory - row_batch->acquire_state(scanner_batch.get()); - _num_rows_returned += row_batch->num_rows(); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - // This is first time reach limit. - // Only valid when query 'select * from table1 limit 20' - if (reached_limit()) { - int num_rows_over = _num_rows_returned - _limit; - row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); - _num_rows_returned -= num_rows_over; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - _scan_finished.store(true); - _queue_writer_cond.notify_all(); - *eos = true; - } else { - *eos = false; - } - - if (VLOG_ROW_IS_ON) { - for (int i = 0; i < row_batch->num_rows(); ++i) { - TupleRow* row = row_batch->get_row(i); - VLOG_ROW << "EsHttpScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); - } - } - - return Status::OK(); -} - -Status EsHttpScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - SCOPED_TIMER(_runtime_profile->total_time_counter()); - _scan_finished.store(true); - _queue_writer_cond.notify_all(); - _queue_reader_cond.notify_all(); - for (int i = 0; i < _scanner_threads.size(); ++i) { - _scanner_threads[i].join(); - } - - _batch_queue.clear(); - - //don't need to hold lock to update_status in close function - //collect scanners status - update_status(collect_scanners_status()); - - //close exec node - update_status(ExecNode::close(state)); - - return _process_status; -} - -// This function is called after plan node has been prepared. -Status EsHttpScanNode::set_scan_ranges(const std::vector& scan_ranges) { - _scan_ranges = scan_ranges; - return Status::OK(); -} - -void EsHttpScanNode::debug_string(int ident_level, std::stringstream* out) const { - (*out) << "EsHttpScanNode"; -} - -Status EsHttpScanNode::scanner_scan(std::unique_ptr scanner, - const std::vector& conjunct_ctxs, - EsScanCounter* counter) { - RETURN_IF_ERROR(scanner->open()); - bool scanner_eof = false; - - while (!scanner_eof) { - // Fill one row batch - std::shared_ptr row_batch(new RowBatch(row_desc(), _runtime_state->batch_size())); - - // create new tuple buffer for row_batch - MemPool* tuple_pool = row_batch->tuple_data_pool(); - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = tuple_pool->allocate(tuple_buffer_size); - if (tuple_buffer == nullptr) { - return Status::InternalError("Allocate memory for row batch failed."); - } - - Tuple* tuple = reinterpret_cast(tuple_buffer); - while (!scanner_eof) { - RETURN_IF_CANCELLED(_runtime_state); - // If we have finished all works - if (_scan_finished.load()) { - return Status::OK(); - } - - // This row batch has been filled up, and break this - if (row_batch->is_full()) { - break; - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - // scan node is the first tuple of tuple row - row->set_tuple(0, tuple); - memset(tuple, 0, _tuple_desc->num_null_bytes()); - - // Get from scanner - RETURN_IF_ERROR(scanner->get_next(tuple, tuple_pool, &scanner_eof, _docvalue_context)); - if (scanner_eof) { - continue; - } - - // eval conjuncts of this row. - if (eval_conjuncts(&conjunct_ctxs[0], conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - char* new_tuple = reinterpret_cast(tuple); - new_tuple += _tuple_desc->byte_size(); - tuple = reinterpret_cast(new_tuple); - counter->num_rows_returned++; - } else { - counter->num_rows_filtered++; - } - } - - // Row batch has been filled, push this to the queue - if (row_batch->num_rows() > 0) { - std::unique_lock l(_batch_queue_lock); - while (_process_status.ok() && !_scan_finished.load() && - !_runtime_state->is_cancelled() && - _batch_queue.size() >= _max_buffered_batches) { - _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); - } - // Process already set failed, so we just return OK - if (!_process_status.ok()) { - return Status::OK(); - } - // Scan already finished, just return - if (_scan_finished.load()) { - return Status::OK(); - } - // Runtime state is canceled, just return cancel - if (_runtime_state->is_cancelled()) { - return Status::Cancelled("Cancelled"); - } - // Queue size Must be smaller than _max_buffered_batches - _batch_queue.push_back(row_batch); - - // Notify reader to process - _queue_reader_cond.notify_one(); - } - } - - return Status::OK(); -} - -// Prefer to the local host -static std::string get_host_port(const std::vector& es_hosts) { - std::string host_port; - std::string localhost = BackendOptions::get_localhost(); - - TNetworkAddress host = es_hosts[0]; - for (auto& es_host : es_hosts) { - if (es_host.hostname == localhost) { - host = es_host; - break; - } - } - - host_port = host.hostname; - host_port += ":"; - host_port += std::to_string(host.port); - return host_port; -} - -void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise& p_status) { - SCOPED_ATTACH_TASK(_runtime_state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - // Clone expr context - std::vector scanner_expr_ctxs; - DCHECK(start_idx < length); - auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs); - if (!status.ok()) { - LOG(WARNING) << "Clone conjuncts failed."; - } - - EsScanCounter counter; - const TEsScanRange& es_scan_range = _scan_ranges[start_idx].scan_range.es_scan_range; - - // Collect the information from scan range to properties - std::map properties(_properties); - properties[ESScanReader::KEY_INDEX] = es_scan_range.index; - if (es_scan_range.__isset.type) { - properties[ESScanReader::KEY_TYPE] = es_scan_range.type; - } - properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range.shard_id); - properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_runtime_state->batch_size()); - properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range.es_hosts); - // push down limit to Elasticsearch - // if predicate in _conjunct_ctxs can not be processed by Elasticsearch, we can not push down limit operator to Elasticsearch - if (limit() != -1 && limit() <= _runtime_state->batch_size() && _conjunct_ctxs.empty()) { - properties[ESScanReader::KEY_TERMINATE_AFTER] = std::to_string(limit()); - } - - bool doc_value_mode = false; - properties[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build( - properties, _column_names, _predicates, _docvalue_context, &doc_value_mode); - - // start scanner to scan - std::unique_ptr scanner( - new EsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties, - scanner_expr_ctxs, &counter, doc_value_mode)); - status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter); - if (!status.ok()) { - LOG(WARNING) << "Scanner[" << start_idx << "] process failed. status=" << status; - } - - // scanner is going to finish - { - std::lock_guard l(_batch_queue_lock); - if (!status.ok()) { - update_status(status); - } - // This scanner will finish - _num_running_scanners--; - } - _queue_reader_cond.notify_all(); - // If one scanner failed, others don't need scan any more - if (!status.ok()) { - _queue_writer_cond.notify_all(); - } - - p_status.set_value(status); -} -} // namespace doris diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h deleted file mode 100644 index d07f68af24..0000000000 --- a/be/src/exec/es_http_scan_node.h +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/es_http_scanner.h" -#include "exec/scan_node.h" -#include "gen_cpp/PaloInternalService_types.h" - -namespace doris { - -class RuntimeState; -class PartRangeKey; -class PartitionInfo; -class EsHttpScanCounter; -class EsPredicate; - -class EsHttpScanNode : public ScanNode { -public: - EsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~EsHttpScanNode(); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - virtual Status prepare(RuntimeState* state) override; - virtual Status open(RuntimeState* state) override; - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - virtual Status close(RuntimeState* state) override; - virtual Status set_scan_ranges(const std::vector& scan_ranges) override; - -protected: - // Write debug string of this into out. - virtual void debug_string(int indentation_level, std::stringstream* out) const override; - - // Update process status to one failed status, - // NOTE: Must hold the mutex of this scan node - bool update_status(const Status& new_status) { - if (_process_status.ok()) { - _process_status = new_status; - return true; - } - return false; - } - - // One scanner worker, This scanner will handle 'length' ranges start from start_idx - virtual void scanner_worker(int start_idx, int length, std::promise& p_status); - - TupleId _tuple_id; - RuntimeState* _runtime_state; - TupleDescriptor* _tuple_desc; - - int _num_running_scanners; - std::atomic _scan_finished; - bool _eos; - int _max_buffered_batches; - RuntimeProfile::Counter* _wait_scanner_timer; - - Status _process_status; - - std::map _docvalue_context; - - std::condition_variable _queue_reader_cond; - std::condition_variable _queue_writer_cond; - -private: - // Create scanners to do scan job - Status start_scanners(); - - // Collect all scanners 's status - Status collect_scanners_status(); - - // Scan one range - Status scanner_scan(std::unique_ptr scanner, - const std::vector& conjunct_ctxs, EsScanCounter* counter); - - Status build_conjuncts_list(); - - std::vector _scanner_threads; - std::vector> _scanners_status; - std::map _properties; - std::map _fields_context; - std::vector _scan_ranges; - std::vector _column_names; - - std::mutex _batch_queue_lock; - std::deque> _batch_queue; - std::vector _predicates; - - std::vector _predicate_to_conjunct; - std::vector _conjunct_to_predicate; - - std::unique_ptr _scanner_profile; -}; - -} // namespace doris diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp deleted file mode 100644 index cb9747673f..0000000000 --- a/be/src/exec/es_http_scanner.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es_http_scanner.h" - -#include -#include - -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" - -namespace doris { - -EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - const std::map& properties, - const std::vector& conjunct_ctxs, EsScanCounter* counter, - bool doc_value_mode) - : _state(state), - _profile(profile), - _tuple_id(tuple_id), - _properties(properties), - _conjunct_ctxs(conjunct_ctxs), - _next_range(0), - _line_eof(false), - _batch_eof(false), - _mem_pool(new MemPool()), - _tuple_desc(nullptr), - _counter(counter), - _es_reader(nullptr), - _es_scroll_parser(nullptr), - _doc_value_mode(doc_value_mode), - _rows_read_counter(nullptr), - _read_timer(nullptr), - _materialize_timer(nullptr) {} - -EsHttpScanner::~EsHttpScanner() { - close(); -} - -Status EsHttpScanner::open() { - _tuple_desc = _state->desc_tbl().get_tuple_descriptor(_tuple_id); - if (_tuple_desc == nullptr) { - return Status::InternalError("Unknown tuple descriptor, tuple_id={}", _tuple_id); - } - - const std::string& host = _properties.at(ESScanReader::KEY_HOST_PORT); - _es_reader.reset(new ESScanReader(host, _properties, _doc_value_mode)); - if (_es_reader == nullptr) { - return Status::InternalError("Es reader construct failed."); - } - - RETURN_IF_ERROR(_es_reader->open()); - - _rows_read_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); - _read_timer = ADD_TIMER(_profile, "TotalRawReadTime(*)"); - _materialize_timer = ADD_TIMER(_profile, "MaterializeTupleTime(*)"); - - return Status::OK(); -} - -Status EsHttpScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, - const std::map& docvalue_context) { - SCOPED_TIMER(_read_timer); - if (_line_eof && _batch_eof) { - *eof = true; - return Status::OK(); - } - - while (!_batch_eof) { - if (_line_eof || _es_scroll_parser == nullptr) { - RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, _es_scroll_parser)); - if (_batch_eof) { - *eof = true; - return Status::OK(); - } - } - - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_es_scroll_parser->fill_tuple(_tuple_desc, tuple, tuple_pool, &_line_eof, - docvalue_context)); - if (!_line_eof) { - break; - } - } - - return Status::OK(); -} - -void EsHttpScanner::close() { - if (_es_reader != nullptr) { - _es_reader->close(); - } - - Expr::close(_conjunct_ctxs, _state); -} - -} // namespace doris diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h deleted file mode 100644 index 4e86a2ba36..0000000000 --- a/be/src/exec/es_http_scanner.h +++ /dev/null @@ -1,96 +0,0 @@ - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/global_types.h" -#include "common/status.h" -#include "exec/es/es_scan_reader.h" -#include "runtime/mem_pool.h" -#include "util/runtime_profile.h" - -namespace doris { - -class Tuple; -class SlotDescriptor; -class RuntimeState; -class ExprContext; -class TextConverter; -class TupleDescriptor; -class TupleRow; -class RowDescriptor; -class RuntimeProfile; - -struct EsScanCounter { - EsScanCounter() : num_rows_returned(0), num_rows_filtered(0) {} - - int64_t num_rows_returned; - int64_t num_rows_filtered; -}; - -class EsHttpScanner { -public: - EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - const std::map& properties, - const std::vector& conjunct_ctxs, EsScanCounter* counter, - bool doc_value_mode); - ~EsHttpScanner(); - - Status open(); - - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, - const std::map& docvalue_context); - - void close(); - -protected: - RuntimeState* _state; - RuntimeProfile* _profile; - TupleId _tuple_id; - const std::map& _properties; - const std::vector& _conjunct_ctxs; - - int _next_range; - bool _line_eof; - bool _batch_eof; - - std::vector _slot_descs; - std::unique_ptr _row_desc; - - std::unique_ptr _mem_pool; - - const TupleDescriptor* _tuple_desc; - EsScanCounter* _counter; - std::unique_ptr _es_reader; - std::unique_ptr _es_scroll_parser; - - bool _doc_value_mode; - - // Profile - RuntimeProfile::Counter* _rows_read_counter; - RuntimeProfile::Counter* _read_timer; - RuntimeProfile::Counter* _materialize_timer; -}; - -} // namespace doris diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp deleted file mode 100644 index 4a1adf2f43..0000000000 --- a/be/src/exec/except_node.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/except_node.h" - -#include "exec/hash_table.h" -#include "exprs/expr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { -ExceptNode::ExceptNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : SetOperationNode(pool, tnode, descs, tnode.except_node.tuple_id) {} - -Status ExceptNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(SetOperationNode::init(tnode, state)); - // Create result_expr_ctx_lists_ from thrift exprs. - auto& result_texpr_lists = tnode.except_node.result_expr_lists; - for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); - _child_expr_lists.push_back(ctxs); - } - return Status::OK(); -} - -Status ExceptNode::open(RuntimeState* state) { - RETURN_IF_ERROR(SetOperationNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // if a table is empty, the result must be empty - if (_hash_tbl->size() == 0) { - _hash_tbl_iterator = _hash_tbl->begin(); - return Status::OK(); - } - bool eos = false; - _valid_element_in_hash_tbl = _hash_tbl->num_filled_buckets(); - - for (int i = 1; i < _children.size(); ++i) { - // rebuild hash table, for first time will rebuild with the no duplicated _hash_tbl, - if (i > 1) { - RETURN_IF_ERROR(refresh_hash_table(i)); - } - - // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size())); - ScopedTimer probe_timer(_probe_timer); - RETURN_IF_ERROR(child(i)->open(state)); - eos = false; - while (!eos) { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos)); - for (int j = 0; j < _probe_batch->num_rows(); ++j) { - _hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j)); - if (_hash_tbl_iterator != _hash_tbl->end()) { - if (!_hash_tbl_iterator.matched()) { - _hash_tbl_iterator.set_matched(); - _valid_element_in_hash_tbl--; - } - } - } - _probe_batch->reset(); - } - // if a table is empty, the result must be empty - if (_hash_tbl->size() == 0) { - break; - } - } - _hash_tbl_iterator = _hash_tbl->begin(); - return Status::OK(); -} - -Status ExceptNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - *eos = true; - if (reached_limit()) { - return Status::OK(); - } - int64_t tuple_buf_size; - uint8_t* tuple_buf; - RETURN_IF_ERROR( - out_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); - memset(tuple_buf, 0, tuple_buf_size); - while (_hash_tbl_iterator.has_next()) { - if (!_hash_tbl_iterator.matched()) { - create_output_row(_hash_tbl_iterator.get_row(), out_batch, tuple_buf); - tuple_buf += _tuple_desc->byte_size(); - ++_num_rows_returned; - } - _hash_tbl_iterator.next(); - *eos = !_hash_tbl_iterator.has_next() || reached_limit(); - if (out_batch->is_full() || out_batch->at_resource_limit() || *eos) { - return Status::OK(); - } - } - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/except_node.h b/be/src/exec/except_node.h deleted file mode 100644 index 9ea163289c..0000000000 --- a/be/src/exec/except_node.h +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/set_operation_node.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TupleRow; - -// Node that calculate the except results of its children by either materializing their -// evaluated expressions into row batches or passing through (forwarding) the -// batches if the input tuple layout is identical to the output tuple layout -// and expressions don't need to be evaluated. The except node pulls from its -// children sequentially, i.e. -// it exhausts one child completely before moving on to the next one. -class ExceptNode : public SetOperationNode { -public: - ExceptNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); -}; - -}; // namespace doris diff --git a/be/src/exec/exchange_node.cpp b/be/src/exec/exchange_node.cpp deleted file mode 100644 index 5bde6bc324..0000000000 --- a/be/src/exec/exchange_node.cpp +++ /dev/null @@ -1,257 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/exchange-node.cc -// and modified by Doris - -#include "exec/exchange_node.h" - -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/data_stream_mgr.h" -#include "runtime/data_stream_recvr.h" -#include "runtime/exec_env.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/runtime_profile.h" - -namespace doris { -using namespace ErrorCode; - -ExchangeNode::ExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _num_senders(0), - _stream_recvr(nullptr), - _input_row_desc(descs, tnode.exchange_node.input_row_tuples, - std::vector(tnode.nullable_tuples.begin(), - tnode.nullable_tuples.begin() + - tnode.exchange_node.input_row_tuples.size())), - _next_row_idx(0), - _is_merging(tnode.exchange_node.__isset.sort_info), - _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0), - _num_rows_skipped(0) { - DCHECK_GE(_offset, 0); - DCHECK(_is_merging || (_offset == 0)); -} - -Status ExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - if (!_is_merging) { - return Status::OK(); - } - - RETURN_IF_ERROR(_sort_exec_exprs.init(tnode.exchange_node.sort_info, _pool)); - _is_asc_order = tnode.exchange_node.sort_info.is_asc_order; - _nulls_first = tnode.exchange_node.sort_info.nulls_first; - return Status::OK(); -} - -Status ExchangeNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _convert_row_batch_timer = ADD_TIMER(runtime_profile(), "ConvertRowBatchTime"); - // TODO: figure out appropriate buffer size - DCHECK_GT(_num_senders, 0); - _sub_plan_query_statistics_recvr.reset(new QueryStatisticsRecvr()); - _stream_recvr = state->exec_env()->stream_mgr()->create_recvr( - state, _input_row_desc, state->fragment_instance_id(), _id, _num_senders, - config::exchg_node_buffer_size_bytes, _runtime_profile.get(), _is_merging, - _sub_plan_query_statistics_recvr); - if (_is_merging) { - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor)); - // AddExprCtxsToFree(_sort_exec_exprs); - } - return Status::OK(); -} - -Status ExchangeNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - if (_is_merging) { - RETURN_IF_ERROR(_sort_exec_exprs.open(state)); - TupleRowComparator less_than(_sort_exec_exprs, _is_asc_order, _nulls_first); - // create_merger() will populate its merging heap with batches from the _stream_recvr, - // so it is not necessary to call fill_input_row_batch(). - if (state->enable_exchange_node_parallel_merge()) { - RETURN_IF_ERROR(_stream_recvr->create_parallel_merger(less_than, state->batch_size())); - } else { - RETURN_IF_ERROR(_stream_recvr->create_merger(less_than)); - } - } else { - RETURN_IF_ERROR(fill_input_row_batch(state)); - } - return Status::OK(); -} - -Status ExchangeNode::collect_query_statistics(QueryStatistics* statistics) { - RETURN_IF_ERROR(ExecNode::collect_query_statistics(statistics)); - statistics->merge(_sub_plan_query_statistics_recvr.get()); - return Status::OK(); -} - -Status ExchangeNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - if (_is_merging) { - _sort_exec_exprs.close(state); - } - if (_stream_recvr != nullptr) { - _stream_recvr->close(); - } - // _stream_recvr.reset(); - return ExecNode::close(state); -} - -Status ExchangeNode::fill_input_row_batch(RuntimeState* state) { - DCHECK(!_is_merging); - Status ret_status; - { - // SCOPED_TIMER(state->total_network_receive_timer()); - ret_status = _stream_recvr->get_batch(&_input_batch); - } - VLOG_FILE << "exch: has batch=" << (_input_batch == nullptr ? "false" : "true") - << " #rows=" << (_input_batch != nullptr ? _input_batch->num_rows() : 0) - << " is_cancelled=" << (ret_status.is() ? "true" : "false") - << " instance_id=" << state->fragment_instance_id(); - return ret_status; -} - -Status ExchangeNode::get_next(RuntimeState* state, RowBatch* output_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit()) { - _stream_recvr->transfer_all_resources(output_batch); - *eos = true; - return Status::OK(); - } else { - *eos = false; - } - - if (_is_merging) { - return get_next_merging(state, output_batch, eos); - } - - ExprContext* const* ctxs = &_conjunct_ctxs[0]; - int num_ctxs = _conjunct_ctxs.size(); - - while (true) { - { - SCOPED_TIMER(_convert_row_batch_timer); - RETURN_IF_CANCELLED(state); - // copy rows until we hit the limit/capacity or until we exhaust _input_batch - while (!reached_limit() && !output_batch->at_capacity() && _input_batch != nullptr && - _next_row_idx < _input_batch->num_rows()) { - TupleRow* src = _input_batch->get_row(_next_row_idx); - - if (ExecNode::eval_conjuncts(ctxs, num_ctxs, src)) { - int j = output_batch->add_row(); - TupleRow* dest = output_batch->get_row(j); - // if the input row is shorter than the output row, make sure not to leave - // uninitialized Tuple* around - output_batch->clear_row(dest); - // this works as expected if rows from input_batch form a prefix of - // rows in output_batch - _input_batch->copy_row(src, dest); - output_batch->commit_last_row(); - ++_num_rows_returned; - } - - ++_next_row_idx; - } - - if (VLOG_ROW_IS_ON) { - VLOG_ROW << "ExchangeNode output batch: " << output_batch->to_string(); - } - - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - if (reached_limit()) { - _stream_recvr->transfer_all_resources(output_batch); - *eos = true; - return Status::OK(); - } - - if (output_batch->at_capacity()) { - *eos = false; - return Status::OK(); - } - } - - // we need more rows - if (_input_batch != nullptr) { - _input_batch->transfer_resource_ownership(output_batch); - } - - RETURN_IF_ERROR(fill_input_row_batch(state)); - *eos = (_input_batch == nullptr); - if (*eos) { - return Status::OK(); - } - - _next_row_idx = 0; - DCHECK(_input_batch->row_desc().layout_is_prefix_of(output_batch->row_desc())); - } -} - -Status ExchangeNode::get_next_merging(RuntimeState* state, RowBatch* output_batch, bool* eos) { - DCHECK_EQ(output_batch->num_rows(), 0); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Exchange, while merging next.")); - - RETURN_IF_ERROR(_stream_recvr->get_next(output_batch, eos)); - while ((_num_rows_skipped < _offset)) { - _num_rows_skipped += output_batch->num_rows(); - // Throw away rows in the output batch until the offset is skipped. - int rows_to_keep = _num_rows_skipped - _offset; - if (rows_to_keep > 0) { - output_batch->copy_rows(0, output_batch->num_rows() - rows_to_keep, rows_to_keep); - output_batch->set_num_rows(rows_to_keep); - } else { - output_batch->set_num_rows(0); - } - if (rows_to_keep > 0 || *eos || output_batch->at_capacity()) { - break; - } - RETURN_IF_ERROR(_stream_recvr->get_next(output_batch, eos)); - } - - _num_rows_returned += output_batch->num_rows(); - if (reached_limit()) { - output_batch->set_num_rows(output_batch->num_rows() - (_num_rows_returned - _limit)); - *eos = true; - } - - // On eos, transfer all remaining resources from the input batches maintained - // by the merger to the output batch. - if (*eos) { - _stream_recvr->transfer_all_resources(output_batch); - } - - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -void ExchangeNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "ExchangeNode(#senders=" << _num_senders; - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -} // namespace doris diff --git a/be/src/exec/exchange_node.h b/be/src/exec/exchange_node.h deleted file mode 100644 index 75ecdf1c0f..0000000000 --- a/be/src/exec/exchange_node.h +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/exchange-node.h -// and modified by Doris - -#pragma once - -#include "exec/exec_node.h" -#include "exec/sort_exec_exprs.h" -#include "runtime/data_stream_recvr.h" - -namespace doris { - -class RowBatch; -class RuntimeProfile; - -// Receiver node for data streams. The data stream receiver is created in Prepare() -// and closed in Close(). -// is_merging is set to indicate that rows from different senders must be merged -// according to the sort parameters in _sort_exec_exprs. (It is assumed that the rows -// received from the senders themselves are sorted.) -// If _is_merging is true, the exchange node creates a DataStreamRecvr with the -// _is_merging flag and retrieves rows from the receiver via calls to -// DataStreamRecvr::GetNext(). It also prepares, opens and closes the ordering exprs in -// its SortExecExprs member that are used to compare rows. -// If _is_merging is false, the exchange node directly retrieves batches from the row -// batch queue of the DataStreamRecvr via calls to DataStreamRecvr::GetBatch(). -class ExchangeNode : public ExecNode { -public: - ExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~ExchangeNode() {} - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - virtual Status prepare(RuntimeState* state) override; - // Blocks until the first batch is available for consumption via GetNext(). - virtual Status open(RuntimeState* state) override; - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - Status collect_query_statistics(QueryStatistics* statistics) override; - virtual Status close(RuntimeState* state) override; - - // the number of senders needs to be set after the c'tor, because it's not - // recorded in TPlanNode, and before calling prepare() - void set_num_senders(int num_senders) { _num_senders = num_senders; } - -protected: - virtual void debug_string(int indentation_level, std::stringstream* out) const override; - -private: - // Implements GetNext() for the case where _is_merging is true. Delegates the GetNext() - // call to the underlying DataStreamRecvr. - Status get_next_merging(RuntimeState* state, RowBatch* output_batch, bool* eos); - - // Resets _input_batch to the next batch from the from _stream_recvr's queue. - // Only used when _is_merging is false. - Status fill_input_row_batch(RuntimeState* state); - - int _num_senders; // needed for _stream_recvr construction - - // created in prepare() and owned by the RuntimeState - std::shared_ptr _stream_recvr; - - // our input rows are a prefix of the rows we produce - RowDescriptor _input_row_desc; - - // the size of our input batches does not necessarily match the capacity - // of our output batches, which means that we need to buffer the input - // Current batch of rows from the receiver queue being processed by this node. - // Only valid if _is_merging is false. (If _is_merging is true, GetNext() is - // delegated to the receiver). Owned by the stream receiver. - // std::unique_ptr _input_batch; - RowBatch* _input_batch = nullptr; - - // Next row to copy from _input_batch. For non-merging exchanges, _input_batch - // is retrieved directly from the sender queue in the stream recvr, and rows from - // _input_batch must be copied to the output batch in GetNext(). - int _next_row_idx; - - // time spent reconstructing received rows - RuntimeProfile::Counter* _convert_row_batch_timer; - - // True if this is a merging exchange node. If true, GetNext() is delegated to the - // underlying _stream_recvr, and _input_batch is not used/valid. - bool _is_merging; - - // Sort expressions and parameters passed to the merging receiver.. - SortExecExprs _sort_exec_exprs; - std::vector _is_asc_order; - std::vector _nulls_first; - - // Offset specifying number of rows to skip. - int64_t _offset; - - // Number of rows skipped so far. - int64_t _num_rows_skipped; - - // Sub plan query statistics receiver. It is shared with DataStreamRecvr and will be - // called in two different threads. When ExchangeNode is destructed, this may be accessed - // by recvr thread in DataStreamMgr's transmit_data. - std::shared_ptr _sub_plan_query_statistics_recvr; -}; - -}; // namespace doris diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 54238b69c4..7d35db7ad9 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -27,30 +27,7 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exec/analytic_eval_node.h" -#include "exec/assert_num_rows_node.h" -#include "exec/broker_scan_node.h" -#include "exec/cross_join_node.h" -#include "exec/empty_set_node.h" -#include "exec/es_http_scan_node.h" -#include "exec/except_node.h" -#include "exec/exchange_node.h" -#include "exec/hash_join_node.h" -#include "exec/intersect_node.h" -#include "exec/merge_node.h" -#include "exec/mysql_scan_node.h" -#include "exec/odbc_scan_node.h" -#include "exec/olap_scan_node.h" -#include "exec/partitioned_aggregation_node.h" -#include "exec/repeat_node.h" -#include "exec/schema_scan_node.h" -#include "exec/select_node.h" -#include "exec/spill_sort_node.h" -#include "exec/table_function_node.h" -#include "exec/topn_node.h" -#include "exec/union_node.h" #include "exprs/expr_context.h" -#include "odbc_scan_node.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker.h" @@ -449,8 +426,9 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN #ifdef DORIS_WITH_MYSQL if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VMysqlScanNode(pool, tnode, descs)); - } else - *node = pool->add(new MysqlScanNode(pool, tnode, descs)); + } else { + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); + } return Status::OK(); #else return Status::InternalError( @@ -460,7 +438,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::NewOdbcScanNode(pool, tnode, descs)); } else { - *node = pool->add(new OdbcScanNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -482,7 +460,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::NewEsScanNode(pool, tnode, descs)); } else { - *node = pool->add(new EsHttpScanNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -490,7 +468,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VSchemaScanNode(pool, tnode, descs)); } else { - *node = pool->add(new SchemaScanNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -498,7 +476,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::NewOlapScanNode(pool, tnode, descs)); } else { - *node = pool->add(new OlapScanNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -506,7 +484,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::AggregationNode(pool, tnode, descs)); } else { - *node = pool->add(new PartitionedAggregationNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -521,7 +499,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN } *node = pool->add(new vectorized::HashJoinNode(pool, tnode, descs)); } else { - *node = pool->add(new HashJoinNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -529,7 +507,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VNestedLoopJoinNode(pool, tnode, descs)); } else { - *node = pool->add(new CrossJoinNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -537,7 +515,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VEmptySetNode(pool, tnode, descs)); } else { - *node = pool->add(new EmptySetNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -545,7 +523,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new doris::vectorized::VExchangeNode(pool, tnode, descs)); } else { - *node = pool->add(new ExchangeNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -553,7 +531,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new doris::vectorized::VSelectNode(pool, tnode, descs)); } else { - *node = pool->add(new SelectNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -561,11 +539,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VSortNode(pool, tnode, descs)); } else { - if (tnode.sort_node.use_top_n) { - *node = pool->add(new TopNNode(pool, tnode, descs)); - } else { - *node = pool->add(new SpillSortNode(pool, tnode, descs)); - } + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -573,19 +547,18 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VAnalyticEvalNode(pool, tnode, descs)); } else { - *node = pool->add(new AnalyticEvalNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); case TPlanNodeType::MERGE_NODE: - *node = pool->add(new MergeNode(pool, tnode, descs)); - return Status::OK(); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); case TPlanNodeType::UNION_NODE: if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VUnionNode(pool, tnode, descs)); } else { - *node = pool->add(new UnionNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -593,7 +566,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VIntersectNode(pool, tnode, descs)); } else { - *node = pool->add(new IntersectNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -601,7 +574,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VExceptNode(pool, tnode, descs)); } else { - *node = pool->add(new ExceptNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -609,7 +582,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VBrokerScanNode(pool, tnode, descs)); } else { - *node = pool->add(new BrokerScanNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -625,7 +598,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VRepeatNode(pool, tnode, descs)); } else { - *node = pool->add(new RepeatNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -633,7 +606,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VAssertNumRowsNode(pool, tnode, descs)); } else { - *node = pool->add(new AssertNumRowsNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -641,7 +614,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN if (state->enable_vectorized_exec()) { *node = pool->add(new vectorized::VTableFunctionNode(pool, tnode, descs)); } else { - *node = pool->add(new TableFunctionNode(pool, tnode, descs)); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } return Status::OK(); @@ -650,8 +623,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN *node = pool->add(new vectorized::VDataGenFunctionScanNode(pool, tnode, descs)); return Status::OK(); } else { - error_msg << "numbers table function only support vectorized execution"; - return Status::InternalError(error_msg.str()); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } default: diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp deleted file mode 100644 index f55ae79be9..0000000000 --- a/be/src/exec/hash_join_node.cpp +++ /dev/null @@ -1,802 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/hash-join-node.cc -// and modified by Doris -#include "exec/hash_join_node.h" - -#include -#include - -#include "common/utils.h" -#include "exec/hash_table.h" -#include "exprs/bloomfilter_predicate.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/runtime_filter.h" -#include "exprs/runtime_filter_slots.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_filter_mgr.h" -#include "runtime/runtime_state.h" -#include "util/defer_op.h" -#include "util/runtime_profile.h" - -namespace doris { - -HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _join_op(tnode.hash_join_node.join_op), - _probe_counter(0), - _probe_eos(false), - _process_probe_batch_fn(nullptr), - _anti_join_last_pos(nullptr) { - _match_all_probe = - (_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); - _match_one_build = (_join_op == TJoinOp::LEFT_SEMI_JOIN); - _match_all_build = - (_join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN); - _build_unique = _join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == TJoinOp::LEFT_SEMI_JOIN; - - _runtime_filter_descs = tnode.runtime_filters; -} - -HashJoinNode::~HashJoinNode() { - // _probe_batch must be cleaned up in close() to ensure proper resource freeing. - DCHECK(_probe_batch == nullptr); -} - -Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - DCHECK(tnode.__isset.hash_join_node); - const std::vector& eq_join_conjuncts = tnode.hash_join_node.eq_join_conjuncts; - - for (int i = 0; i < eq_join_conjuncts.size(); ++i) { - ExprContext* ctx = nullptr; - RETURN_IF_ERROR(Expr::create_expr_tree(_pool, eq_join_conjuncts[i].left, &ctx)); - _probe_expr_ctxs.push_back(ctx); - RETURN_IF_ERROR(Expr::create_expr_tree(_pool, eq_join_conjuncts[i].right, &ctx)); - _build_expr_ctxs.push_back(ctx); - if (eq_join_conjuncts[i].__isset.opcode && - eq_join_conjuncts[i].opcode == TExprOpcode::EQ_FOR_NULL) { - _is_null_safe_eq_join.push_back(true); - } else { - _is_null_safe_eq_join.push_back(false); - } - } - - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.hash_join_node.other_join_conjuncts, - &_other_join_conjunct_ctxs)); - - if (!_other_join_conjunct_ctxs.empty()) { - // If LEFT SEMI JOIN/LEFT ANTI JOIN with not equal predicate, - // build table should not be deduplicated. - _build_unique = false; - } - - _runtime_filters.resize(_runtime_filter_descs.size()); - - for (size_t i = 0; i < _runtime_filter_descs.size(); i++) { - RETURN_IF_ERROR(state->runtime_filter_mgr()->register_filter( - RuntimeFilterRole::PRODUCER, _runtime_filter_descs[i], state->query_options())); - RETURN_IF_ERROR(state->runtime_filter_mgr()->get_producer_filter( - _runtime_filter_descs[i].filter_id, &_runtime_filters[i])); - } - - return Status::OK(); -} - -Status HashJoinNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - _build_pool.reset(new MemPool(mem_tracker_held())); - _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); - _push_down_timer = ADD_TIMER(runtime_profile(), "PushDownTime"); - _push_compute_timer = ADD_TIMER(runtime_profile(), "PushDownComputeTime"); - _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); - _build_rows_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); - _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT); - _probe_rows_counter = ADD_COUNTER(runtime_profile(), "ProbeRows", TUnit::UNIT); - _hash_tbl_load_factor_counter = - ADD_COUNTER(runtime_profile(), "LoadFactor", TUnit::DOUBLE_VALUE); - _hash_table_list_min_size = ADD_COUNTER(runtime_profile(), "HashTableMinList", TUnit::UNIT); - _hash_table_list_max_size = ADD_COUNTER(runtime_profile(), "HashTableMaxList", TUnit::UNIT); - // build and probe exprs are evaluated in the context of the rows produced by our - // right and left children, respectively - RETURN_IF_ERROR(Expr::prepare(_build_expr_ctxs, state, child(1)->row_desc())); - RETURN_IF_ERROR(Expr::prepare(_probe_expr_ctxs, state, child(0)->row_desc())); - - // _other_join_conjuncts are evaluated in the context of the rows produced by this node - RETURN_IF_ERROR(Expr::prepare(_other_join_conjunct_ctxs, state, _row_descriptor)); - - _result_tuple_row_size = _row_descriptor.tuple_descriptors().size() * sizeof(Tuple*); - - int num_left_tuples = child(0)->row_desc().tuple_descriptors().size(); - int num_build_tuples = child(1)->row_desc().tuple_descriptors().size(); - _probe_tuple_row_size = num_left_tuples * sizeof(Tuple*); - _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); - - // pre-compute the tuple index of build tuples in the output row - _build_tuple_size = num_build_tuples; - _build_tuple_idx.reserve(_build_tuple_size); - - for (int i = 0; i < _build_tuple_size; ++i) { - TupleDescriptor* build_tuple_desc = child(1)->row_desc().tuple_descriptors()[i]; - auto tuple_idx = _row_descriptor.get_tuple_idx(build_tuple_desc->id()); - RETURN_IF_INVALID_TUPLE_IDX(build_tuple_desc->id(), tuple_idx); - _build_tuple_idx.push_back(tuple_idx); - } - _probe_tuple_row_size = num_left_tuples * sizeof(Tuple*); - _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); - - // TODO: default buckets - const bool stores_nulls = - _join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN || - _join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN || - (std::find(_is_null_safe_eq_join.begin(), _is_null_safe_eq_join.end(), true) != - _is_null_safe_eq_join.end()); - _hash_tbl.reset(new HashTable(_build_expr_ctxs, _probe_expr_ctxs, _build_tuple_size, - stores_nulls, _is_null_safe_eq_join, id(), - BitUtil::RoundUpToPowerOfTwo(state->batch_size()))); - - _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - - return Status::OK(); -} - -Status HashJoinNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - - // Must reset _probe_batch in close() to release resources - _probe_batch.reset(nullptr); - - if (_hash_tbl.get() != nullptr) { - _hash_tbl->close(); - } - if (_build_pool.get() != nullptr) { - _build_pool->free_all(); - } - - Expr::close(_build_expr_ctxs, state); - Expr::close(_probe_expr_ctxs, state); - Expr::close(_other_join_conjunct_ctxs, state); - - return ExecNode::close(state); -} - -void HashJoinNode::probe_side_open_thread(RuntimeState* state, std::promise* status) { - SCOPED_ATTACH_TASK(state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - status->set_value(child(0)->open(state)); -} - -Status HashJoinNode::construct_hash_table(RuntimeState* state) { - // Do a full scan of child(1) and store everything in _hash_tbl - // The hash join node needs to keep in memory all build tuples, including the tuple - // row ptrs. The row ptrs are copied into the hash table's internal structure so they - // don't need to be stored in the _build_pool. - RowBatch build_batch(child(1)->row_desc(), state->batch_size()); - RETURN_IF_ERROR(child(1)->open(state)); - - SCOPED_TIMER(_build_timer); - Defer defer {[&] { - COUNTER_SET(_build_rows_counter, _hash_tbl->size()); - COUNTER_SET(_build_buckets_counter, _hash_tbl->num_buckets()); - COUNTER_SET(_hash_tbl_load_factor_counter, _hash_tbl->load_factor()); - auto node = _hash_tbl->minmax_node(); - COUNTER_SET(_hash_table_list_min_size, node.first); - COUNTER_SET(_hash_table_list_max_size, node.second); - }}; - while (true) { - RETURN_IF_CANCELLED(state); - bool eos = true; - RETURN_IF_ERROR(child(1)->get_next(state, &build_batch, &eos)); - RETURN_IF_ERROR(process_build_batch(state, &build_batch)); - VLOG_ROW << _hash_tbl->debug_string(true, &child(1)->row_desc()); - - build_batch.reset(); - - if (eos) { - break; - } - } - - return Status::OK(); -} - -Status HashJoinNode::open(RuntimeState* state) { - for (size_t i = 0; i < _runtime_filter_descs.size(); i++) { - if (auto bf = _runtime_filters[i]->get_bloomfilter()) { - RETURN_IF_ERROR(bf->init_with_fixed_length()); - } - } - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(Expr::open(_build_expr_ctxs, state)); - RETURN_IF_ERROR(Expr::open(_probe_expr_ctxs, state)); - RETURN_IF_ERROR(Expr::open(_other_join_conjunct_ctxs, state)); - - _eos = false; - - // TODO: fix problems with asynchronous cancellation - // Kick-off the construction of the build-side table in a separate - // thread, so that the left child can do any initialisation in parallel. - // Only do this if we can get a thread token. Otherwise, do this in the - // main thread - std::promise thread_status; - add_runtime_exec_option("Hash Table Built Asynchronously"); - std::thread(bind(&HashJoinNode::probe_side_open_thread, this, state, &thread_status)).detach(); - - if (!_runtime_filter_descs.empty()) { - RuntimeFilterSlots runtime_filter_slots(_probe_expr_ctxs, _build_expr_ctxs, - _runtime_filter_descs); - Status st; - do { - st = construct_hash_table(state); - if (UNLIKELY(!st.ok())) { - break; - } - st = runtime_filter_slots.init(state, _hash_tbl->size()); - if (UNLIKELY(!st.ok())) { - break; - } - { - SCOPED_TIMER(_push_compute_timer); - auto func = [&](TupleRow* row) { runtime_filter_slots.insert(row); }; - _hash_tbl->for_each_row(func); - } - COUNTER_UPDATE(_build_timer, _push_compute_timer->value()); - { - SCOPED_TIMER(_push_down_timer); - runtime_filter_slots.publish(); - } - } while (false); - VLOG_ROW << "runtime st: " << st; - // Don't exit even if we see an error, we still need to wait for the probe thread - // to finish. - // If this return first, probe thread will use '_await_time_cost' - // which is already destructor and then coredump. - RETURN_IF_ERROR(thread_status.get_future().get()); - if (UNLIKELY(!st.ok())) { - return st; - } - } else { - // Blocks until ConstructHashTable has returned, after which - // the hash table is fully constructed and we can start the probe - // phase. - RETURN_IF_ERROR(thread_status.get_future().get()); - RETURN_IF_ERROR(construct_hash_table(state)); - } - - // seed probe batch and _current_probe_row, etc. - while (true) { - RETURN_IF_ERROR(child(0)->get_next(state, _probe_batch.get(), &_probe_eos)); - COUNTER_UPDATE(_probe_rows_counter, _probe_batch->num_rows()); - _probe_batch_pos = 0; - - if (_probe_batch->num_rows() == 0) { - if (_probe_eos) { - _hash_tbl_iterator = _hash_tbl->begin(); - _eos = true; - break; - } - - _probe_batch->reset(); - continue; - } else { - _current_probe_row = _probe_batch->get_row(_probe_batch_pos++); - VLOG_ROW << "probe row: " << get_probe_row_output_string(_current_probe_row); - _matched_probe = false; - _hash_tbl_iterator = _hash_tbl->find(_current_probe_row); - break; - } - } - - return Status::OK(); -} - -Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - // In most cases, no additional memory overhead will be applied for at this stage, - // but if the expression calculation in this node needs to apply for additional memory, - // it may cause the memory to exceed the limit. - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - // These cases are simpler and use a more efficient processing loop - if (!(_match_all_build || _join_op == TJoinOp::RIGHT_SEMI_JOIN || - _join_op == TJoinOp::RIGHT_ANTI_JOIN)) { - if (_eos) { - *eos = true; - return Status::OK(); - } - - return left_join_get_next(state, out_batch, eos); - } - - ExprContext* const* other_conjunct_ctxs = &_other_join_conjunct_ctxs[0]; - int num_other_conjunct_ctxs = _other_join_conjunct_ctxs.size(); - - ExprContext* const* conjunct_ctxs = &_conjunct_ctxs[0]; - int num_conjunct_ctxs = _conjunct_ctxs.size(); - - // Explicitly manage the timer counter to avoid measuring time in the child - // GetNext call. - ScopedTimer probe_timer(_probe_timer); - - while (!_eos) { - // create output rows as long as: - // 1) we haven't already created an output row for the probe row and are doing - // a semi-join; - // 2) there are more matching build rows - VLOG_ROW << "probe row: " << get_probe_row_output_string(_current_probe_row); - while (_hash_tbl_iterator.has_next()) { - TupleRow* matched_build_row = _hash_tbl_iterator.get_row(); - VLOG_ROW << "matched_build_row: " << matched_build_row->to_string(child(1)->row_desc()); - - if ((_join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN) && - _hash_tbl_iterator.matched()) { - // We have already matched this build row, continue to next match. - // _hash_tbl_iterator.next(); - _hash_tbl_iterator.next(); - continue; - } - - int row_idx = out_batch->add_row(); - TupleRow* out_row = out_batch->get_row(row_idx); - - // right anti join - // 1. find pos in hash table which meets equi-join - // 2. judge if set matched with other join predicates - // 3. scans hash table to choose row which is't set matched and meets conjuncts - if (_join_op == TJoinOp::RIGHT_ANTI_JOIN) { - create_output_row(out_row, _current_probe_row, matched_build_row); - if (eval_conjuncts(other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) { - _hash_tbl_iterator.set_matched(); - } - _hash_tbl_iterator.next(); - continue; - } else { - // right semi join - // 1. find pos in hash table which meets equi-join and set_matched - // 2. check if the row meets other join predicates - // 3. check if the row meets conjuncts - // right join and full join - // 1. find pos in hash table which meets equi-join - // 2. check if the row meets other join predicates - // 3. check if the row meets conjuncts - // 4. output left and right meeting other predicates and conjuncts - // 5. if full join, output left meeting and right no meeting other - // join predicates and conjuncts - // 6. output left no meeting and right meeting other join predicate - // and conjuncts - create_output_row(out_row, _current_probe_row, matched_build_row); - } - - if (!eval_conjuncts(other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) { - _hash_tbl_iterator.next(); - continue; - } - - if (_join_op == TJoinOp::RIGHT_SEMI_JOIN) { - _hash_tbl_iterator.set_matched(); - } - - // we have a match for the purpose of the (outer?) join as soon as we - // satisfy the JOIN clause conjuncts - _matched_probe = true; - - if (_match_all_build) { - // remember that we matched this build row - _joined_build_rows.insert(matched_build_row); - VLOG_ROW << "joined build row: " << matched_build_row; - } - - _hash_tbl_iterator.next(); - if (eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { - out_batch->commit_last_row(); - VLOG_ROW << "match row: " << out_row->to_string(row_desc()); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - if (out_batch->is_full() || reached_limit()) { - *eos = reached_limit(); - return Status::OK(); - } - } - } - - // check whether we need to output the current probe row before - // getting a new probe batch - if (_match_all_probe && !_matched_probe) { - int row_idx = out_batch->add_row(); - TupleRow* out_row = out_batch->get_row(row_idx); - create_output_row(out_row, _current_probe_row, nullptr); - - if (eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { - out_batch->commit_last_row(); - VLOG_ROW << "match row: " << out_row->to_string(row_desc()); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - _matched_probe = true; - - if (out_batch->is_full() || reached_limit()) { - *eos = reached_limit(); - return Status::OK(); - } - } - } - - if (_probe_batch_pos == _probe_batch->num_rows()) { - // pass on resources, out_batch might still need them - _probe_batch->transfer_resource_ownership(out_batch); - _probe_batch_pos = 0; - - if (out_batch->is_full() || out_batch->at_resource_limit()) { - return Status::OK(); - } - - // get new probe batch - if (!_probe_eos) { - while (true) { - probe_timer.stop(); - RETURN_IF_ERROR(child(0)->get_next(state, _probe_batch.get(), &_probe_eos)); - probe_timer.start(); - - if (_probe_batch->num_rows() == 0) { - // Empty batches can still contain IO buffers, which need to be passed up to - // the caller; transferring resources can fill up out_batch. - _probe_batch->transfer_resource_ownership(out_batch); - - if (_probe_eos) { - _eos = true; - break; - } - - if (out_batch->is_full() || out_batch->at_resource_limit()) { - return Status::OK(); - } - - continue; - } else { - COUNTER_UPDATE(_probe_rows_counter, _probe_batch->num_rows()); - break; - } - } - } else { - _eos = true; - } - - // finish up right outer join - if (_eos && (_match_all_build || _join_op == TJoinOp::RIGHT_ANTI_JOIN)) { - _hash_tbl_iterator = _hash_tbl->begin(); - } - } - - if (_eos) { - break; - } - - // join remaining rows in probe _batch - _current_probe_row = _probe_batch->get_row(_probe_batch_pos++); - VLOG_ROW << "probe row: " << get_probe_row_output_string(_current_probe_row); - _matched_probe = false; - _hash_tbl_iterator = _hash_tbl->find(_current_probe_row); - } - - *eos = true; - if (_match_all_build || _join_op == TJoinOp::RIGHT_ANTI_JOIN) { - // output remaining unmatched build rows - TupleRow* build_row = nullptr; - if (_join_op == TJoinOp::RIGHT_ANTI_JOIN) { - if (_anti_join_last_pos != nullptr) { - _hash_tbl_iterator = *_anti_join_last_pos; - } else { - _hash_tbl_iterator = _hash_tbl->begin(); - } - } - while (!out_batch->is_full() && _hash_tbl_iterator.has_next()) { - build_row = _hash_tbl_iterator.get_row(); - - if (_match_all_build) { - if (_joined_build_rows.find(build_row) != _joined_build_rows.end()) { - _hash_tbl_iterator.next(); - continue; - } - } else if (_join_op == TJoinOp::RIGHT_ANTI_JOIN) { - if (_hash_tbl_iterator.matched()) { - _hash_tbl_iterator.next(); - continue; - } - } - - int row_idx = out_batch->add_row(); - TupleRow* out_row = out_batch->get_row(row_idx); - create_output_row(out_row, nullptr, build_row); - if (eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { - out_batch->commit_last_row(); - VLOG_ROW << "match row: " << out_row->to_string(row_desc()); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - } - _hash_tbl_iterator.next(); - } - if (_join_op == TJoinOp::RIGHT_ANTI_JOIN) { - _anti_join_last_pos = &_hash_tbl_iterator; - } - // we're done if there are no more rows left to check - *eos = !_hash_tbl_iterator.has_next(); - } - - return Status::OK(); -} - -Status HashJoinNode::left_join_get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { - *eos = _eos; - - ScopedTimer probe_timer(_probe_timer); - Defer defer {[&] { COUNTER_SET(_rows_returned_counter, _num_rows_returned); }}; - - while (!_eos) { - // Compute max rows that should be added to out_batch - int64_t max_added_rows = out_batch->capacity() - out_batch->num_rows(); - - if (limit() != -1) { - max_added_rows = std::min(max_added_rows, limit() - rows_returned()); - } - - // Continue processing this row batch - _num_rows_returned += process_probe_batch(out_batch, _probe_batch.get(), max_added_rows); - - if (reached_limit() || out_batch->is_full()) { - *eos = reached_limit(); - break; - } - - // Check to see if we're done processing the current probe batch - if (!_hash_tbl_iterator.has_next() && _probe_batch_pos == _probe_batch->num_rows()) { - _probe_batch->transfer_resource_ownership(out_batch); - _probe_batch_pos = 0; - - if (out_batch->is_full() || out_batch->at_resource_limit()) { - break; - } - - if (_probe_eos) { - *eos = _eos = true; - break; - } else { - probe_timer.stop(); - RETURN_IF_ERROR(child(0)->get_next(state, _probe_batch.get(), &_probe_eos)); - probe_timer.start(); - COUNTER_UPDATE(_probe_rows_counter, _probe_batch->num_rows()); - } - } - } - - return Status::OK(); -} - -std::string HashJoinNode::get_probe_row_output_string(TupleRow* probe_row) { - std::stringstream out; - out << "["; - int* _build_tuple_idx_ptr = &_build_tuple_idx[0]; - - for (int i = 0; i < row_desc().tuple_descriptors().size(); ++i) { - if (i != 0) { - out << " "; - } - - int* is_build_tuple = - std::find(_build_tuple_idx_ptr, _build_tuple_idx_ptr + _build_tuple_size, i); - - if (is_build_tuple != _build_tuple_idx_ptr + _build_tuple_size) { - out << Tuple::to_string(nullptr, *row_desc().tuple_descriptors()[i]); - } else { - out << Tuple::to_string(probe_row->get_tuple(i), *row_desc().tuple_descriptors()[i]); - } - } - - out << "]"; - return out.str(); -} - -void HashJoinNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "_hashJoin(eos=" << (_eos ? "true" : "false") << " probe_batch_pos=" << _probe_batch_pos - << " hash_tbl="; - *out << string(indentation_level * 2, ' '); - *out << "HashTbl("; - // << " build_exprs=" << Expr::debug_string(_build_expr_ctxs) - // << " probe_exprs=" << Expr::debug_string(_probe_expr_ctxs); - *out << ")"; - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -// This function is replaced by codegen -void HashJoinNode::create_output_row(TupleRow* out, TupleRow* probe, TupleRow* build) { - uint8_t* out_ptr = reinterpret_cast(out); - if (probe == nullptr) { - memset(out_ptr, 0, _probe_tuple_row_size); - } else { - memcpy(out_ptr, probe, _probe_tuple_row_size); - } - - if (build == nullptr) { - memset(out_ptr + _probe_tuple_row_size, 0, _build_tuple_row_size); - } else { - memcpy(out_ptr + _probe_tuple_row_size, build, _build_tuple_row_size); - } -} - -// Wrapper around ExecNode's eval conjuncts with a different function name. -// This lets us distinguish between the join conjuncts vs. non-join conjuncts -// for codegen. -// Note: don't declare this static. LLVM will pick the fastcc calling convention and -// we will not be able to replace the functions with codegen'd versions. -// TODO: explicitly set the calling convention? -// TODO: investigate using fastcc for all codegen internal functions? -bool eval_other_join_conjuncts(ExprContext* const* ctxs, int num_ctxs, TupleRow* row) { - return ExecNode::eval_conjuncts(ctxs, num_ctxs, row); -} - -// CreateOutputRow, EvalOtherJoinConjuncts, and EvalConjuncts are replaced by -// codegen. -int HashJoinNode::process_probe_batch(RowBatch* out_batch, RowBatch* probe_batch, - int max_added_rows) { - // This path does not handle full outer or right outer joins - DCHECK(!_match_all_build); - - int row_idx = out_batch->add_rows(max_added_rows); - DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX); - uint8_t* out_row_mem = reinterpret_cast(out_batch->get_row(row_idx)); - TupleRow* out_row = reinterpret_cast(out_row_mem); - - int rows_returned = 0; - int probe_rows = probe_batch->num_rows(); - - ExprContext* const* other_conjunct_ctxs = &_other_join_conjunct_ctxs[0]; - int num_other_conjunct_ctxs = _other_join_conjunct_ctxs.size(); - - ExprContext* const* conjunct_ctxs = &_conjunct_ctxs[0]; - int num_conjunct_ctxs = _conjunct_ctxs.size(); - - while (true) { - // Create output row for each matching build row - while (_hash_tbl_iterator.has_next()) { - TupleRow* matched_build_row = _hash_tbl_iterator.get_row(); - _hash_tbl_iterator.next(); - create_output_row(out_row, _current_probe_row, matched_build_row); - - if (!eval_other_join_conjuncts(other_conjunct_ctxs, num_other_conjunct_ctxs, out_row)) { - continue; - } - - _matched_probe = true; - - // left_anti_join: equal match won't return - if (_join_op == TJoinOp::LEFT_ANTI_JOIN) { - _hash_tbl_iterator = _hash_tbl->end(); - break; - } - - if (eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { - ++rows_returned; - - // Filled up out batch or hit limit - if (UNLIKELY(rows_returned == max_added_rows)) { - goto end; - } - - // Advance to next out row - out_row_mem += out_batch->row_byte_size(); - out_row = reinterpret_cast(out_row_mem); - } - - // Handle left semi-join - if (_match_one_build) { - _hash_tbl_iterator = _hash_tbl->end(); - break; - } - } - - // Handle left outer-join and left semi-join - if ((!_matched_probe && _match_all_probe) || - ((!_matched_probe && _join_op == TJoinOp::LEFT_ANTI_JOIN))) { - create_output_row(out_row, _current_probe_row, nullptr); - _matched_probe = true; - - if (ExecNode::eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, out_row)) { - ++rows_returned; - - if (UNLIKELY(rows_returned == max_added_rows)) { - goto end; - } - - // Advance to next out row - out_row_mem += out_batch->row_byte_size(); - out_row = reinterpret_cast(out_row_mem); - } - } - - if (!_hash_tbl_iterator.has_next()) { - // Advance to the next probe row - if (UNLIKELY(_probe_batch_pos == probe_rows)) { - goto end; - } - if (++_probe_counter % RELEASE_CONTEXT_COUNTER == 0) { - ExprContext::free_local_allocations(_probe_expr_ctxs); - ExprContext::free_local_allocations(_build_expr_ctxs); - } - _current_probe_row = probe_batch->get_row(_probe_batch_pos++); - _hash_tbl_iterator = _hash_tbl->find(_current_probe_row); - _matched_probe = false; - } - } - -end: - - if (_match_one_build && _matched_probe) { - _hash_tbl_iterator = _hash_tbl->end(); - } - - out_batch->commit_rows(rows_returned); - return rows_returned; -} - -// when build table has too many duplicated rows, the collisions will be very serious, -// so in some case will don't need to store duplicated value in hash table, we can build an unique one -Status HashJoinNode::process_build_batch(RuntimeState* state, RowBatch* build_batch) { - // insert build row into our hash table - if (_build_unique) { - for (int i = 0; i < build_batch->num_rows(); ++i) { - TupleRow* tuple_row = nullptr; - if (_hash_tbl->emplace_key(build_batch->get_row(i), &tuple_row)) { - build_batch->get_row(i)->deep_copy(tuple_row, - child(1)->row_desc().tuple_descriptors(), - _build_pool.get(), false); - } - } - } else { - // take ownership of tuple data of build_batch - _build_pool->acquire_data(build_batch->tuple_data_pool(), false); - RETURN_IF_ERROR(_hash_tbl->resize_buckets_ahead(build_batch->num_rows())); - for (int i = 0; i < build_batch->num_rows(); ++i) { - _hash_tbl->insert_without_check(build_batch->get_row(i)); - } - } - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/hash_join_node.h b/be/src/exec/hash_join_node.h deleted file mode 100644 index afb898b991..0000000000 --- a/be/src/exec/hash_join_node.h +++ /dev/null @@ -1,182 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/hash-join-node.h -// and modified by Doris - -#pragma once - -#include -#include -#include -#include - -#include "exec/exec_node.h" -#include "exec/hash_table.h" -#include "gen_cpp/PlanNodes_types.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TupleRow; -class IRuntimeFilter; - -// Node for in-memory hash joins: -// - builds up a hash table with the rows produced by our right input -// (child(1)); build exprs are the rhs exprs of our equi-join predicates -// - for each row from our left input, probes the hash table to retrieve -// matching entries; the probe exprs are the lhs exprs of our equi-join predicates -// -// Row batches: -// - In general, we are not able to pass our output row batch on to our left child (when -// we're fetching the probe rows): if we have a 1xn join, our output will contain -// multiple rows per left input row -// - TODO: fix this, so in the case of 1x1/nx1 joins (for instance, fact to dimension tbl) -// we don't do these extra copies -class HashJoinNode : public ExecNode { -public: - HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - ~HashJoinNode(); - - // set up _build- and _probe_exprs - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -protected: - void debug_string(int indentation_level, std::stringstream* out) const; - -private: - friend class IRuntimeFilter; - - std::unique_ptr _hash_tbl; - HashTable::Iterator _hash_tbl_iterator; - - // for right outer joins, keep track of what's been joined - typedef std::unordered_set BuildTupleRowSet; - BuildTupleRowSet _joined_build_rows; - - TJoinOp::type _join_op; - - // our equi-join predicates " = " are separated into - // _build_exprs (over child(1)) and _probe_exprs (over child(0)) - std::vector _probe_expr_ctxs; - std::vector _build_expr_ctxs; - // true: the operator of eq join predicate is null safe equal => '<=>' - // false: the operator of eq join predicate is equal => '=' - std::vector _is_null_safe_eq_join; - std::list _push_down_expr_ctxs; - - // non-equi-join conjuncts from the JOIN clause - std::vector _other_join_conjunct_ctxs; - - // derived from _join_op - bool _match_all_probe; // output all rows coming from the probe input - bool _match_one_build; // match at most one build row to each probe row - bool _match_all_build; // output all rows coming from the build input - bool _build_unique; // build a hash table without duplicated rows - - bool _matched_probe; // if true, we have matched the current probe row - bool _eos; // if true, nothing left to return in get_next() - std::unique_ptr _build_pool; // holds everything referenced in _hash_tbl - - // Size of the TupleRow (just the Tuple ptrs) from the build (right) and probe (left) - // sides. Set to zero if the build/probe tuples are not returned, e.g., for semi joins. - // Cached because it is used in the hot path. - int _probe_tuple_row_size; - int _build_tuple_row_size; - - // _probe_batch must be cleared before calling get_next(). The child node - // does not initialize all tuple ptrs in the row, only the ones that it - // is responsible for. - std::unique_ptr _probe_batch; - int _probe_batch_pos; // current scan pos in _probe_batch - int _probe_counter; - bool _probe_eos; // if true, probe child has no more rows to process - TupleRow* _current_probe_row; - - // _build_tuple_idx[i] is the tuple index of child(1)'s tuple[i] in the output row - std::vector _build_tuple_idx; - int _build_tuple_size; - - // byte size of result tuple row (sum of the tuple ptrs, not the tuple data). - // This should be the same size as the probe tuple row. - int _result_tuple_row_size; - - // HashJoinNode::process_probe_batch() exactly - typedef int (*ProcessProbeBatchFn)(HashJoinNode*, RowBatch*, RowBatch*, int); - // Jitted ProcessProbeBatch function pointer. Null if codegen is disabled. - ProcessProbeBatchFn _process_probe_batch_fn; - - // record anti join pos in get_next() - HashTable::Iterator* _anti_join_last_pos; - - RuntimeProfile::Counter* _build_timer; // time to build hash table - RuntimeProfile::Counter* _push_down_timer; // time to build hash table - RuntimeProfile::Counter* _push_compute_timer; - RuntimeProfile::Counter* _probe_timer; // time to probe - RuntimeProfile::Counter* _build_rows_counter; // num build rows - RuntimeProfile::Counter* _probe_rows_counter; // num probe rows - RuntimeProfile::Counter* _build_buckets_counter; // num buckets in hash table - RuntimeProfile::Counter* _hash_tbl_load_factor_counter; - RuntimeProfile::Counter* _hash_table_list_min_size; - RuntimeProfile::Counter* _hash_table_list_max_size; - - void probe_side_open_thread(RuntimeState* state, std::promise* status); - - // We parallelise building the build-side with Open'ing the - // probe-side. If, for example, the probe-side child is another - // hash-join node, it can start to build its own build-side at the - // same time. - Status construct_hash_table(RuntimeState* state); - - // GetNext helper function for the common join cases: Inner join, left semi and left - // outer - Status left_join_get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - - // Processes a probe batch for the common (non right-outer join) cases. - // out_batch: the batch for resulting tuple rows - // probe_batch: the probe batch to process. This function can be called to - // continue processing a batch in the middle - // max_added_rows: maximum rows that can be added to out_batch - // return the number of rows added to out_batch - int process_probe_batch(RowBatch* out_batch, RowBatch* probe_batch, int max_added_rows); - - // Construct the build hash table, adding all the rows in 'build_batch' - Status process_build_batch(RuntimeState* state, RowBatch* build_batch); - - // Write combined row, consisting of probe_row and build_row, to out_row. - // This is replaced by codegen. - void create_output_row(TupleRow* out_row, TupleRow* probe_row, TupleRow* build_row); - - // Returns a debug string for probe_rows. Probe rows have tuple ptrs that are - // uninitialized; the left hand child only populates the tuple ptrs it is responsible - // for. This function outputs just the probe row values and leaves the build - // side values as nullptr. - // This is only used for debugging and outputting the left child rows before - // doing the join. - std::string get_probe_row_output_string(TupleRow* probe_row); - - std::vector _runtime_filter_descs; - std::vector _runtime_filters; -}; - -} // namespace doris diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp deleted file mode 100644 index 154a639594..0000000000 --- a/be/src/exec/intersect_node.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/intersect_node.h" - -#include "exec/hash_table.h" -#include "exprs/expr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { -IntersectNode::IntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : SetOperationNode(pool, tnode, descs, tnode.intersect_node.tuple_id) {} - -Status IntersectNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(SetOperationNode::init(tnode, state)); - // Create result_expr_ctx_lists_ from thrift exprs. - auto& result_texpr_lists = tnode.intersect_node.result_expr_lists; - for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); - _child_expr_lists.push_back(ctxs); - } - return Status::OK(); -} - -// the actual intersect operation is in this function, -// 1 build a hash table from child(0) -// 2 probe with child(1), then filter the hash table and find the matched item, use them to rebuild a hash table -// repeat [2] this for all the rest child -Status IntersectNode::open(RuntimeState* state) { - RETURN_IF_ERROR(SetOperationNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // if a table is empty, the result must be empty - if (_hash_tbl->size() == 0) { - _hash_tbl_iterator = _hash_tbl->begin(); - return Status::OK(); - } - bool eos = false; - - for (int i = 1; i < _children.size(); ++i) { - if (i > 1) { - RETURN_IF_ERROR(refresh_hash_table(i)); - } - - _valid_element_in_hash_tbl = 0; - // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size())); - ScopedTimer probe_timer(_probe_timer); - RETURN_IF_ERROR(child(i)->open(state)); - eos = false; - while (!eos) { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos)); - for (int j = 0; j < _probe_batch->num_rows(); ++j) { - _hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j)); - if (_hash_tbl_iterator != _hash_tbl->end()) { - if (!_hash_tbl_iterator.matched()) { - _valid_element_in_hash_tbl++; - _hash_tbl_iterator.set_matched(); - } - } - } - _probe_batch->reset(); - } - } - _hash_tbl_iterator = _hash_tbl->begin(); - return Status::OK(); -} - -Status IntersectNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - *eos = true; - if (reached_limit()) { - return Status::OK(); - } - int64_t tuple_buf_size; - uint8_t* tuple_buf; - RETURN_IF_ERROR( - out_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); - memset(tuple_buf, 0, tuple_buf_size); - while (_hash_tbl_iterator.has_next()) { - if (_hash_tbl_iterator.matched()) { - create_output_row(_hash_tbl_iterator.get_row(), out_batch, tuple_buf); - tuple_buf += _tuple_desc->byte_size(); - ++_num_rows_returned; - } - _hash_tbl_iterator.next(); - *eos = !_hash_tbl_iterator.has_next() || reached_limit(); - if (out_batch->is_full() || out_batch->at_resource_limit() || *eos) { - return Status::OK(); - } - } - return Status::OK(); -} -} // namespace doris \ No newline at end of file diff --git a/be/src/exec/intersect_node.h b/be/src/exec/intersect_node.h deleted file mode 100644 index 554a9e4e43..0000000000 --- a/be/src/exec/intersect_node.h +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/set_operation_node.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TupleRow; - -// Node that calculate the intersect results of its children by either materializing their -// evaluated expressions into row batches or passing through (forwarding) the -// batches if the input tuple layout is identical to the output tuple layout -// and expressions don't need to be evaluated. The children should be ordered -// such that all passthrough children come before the children that need -// materialization. The intersect node pulls from its children sequentially, i.e. -// it exhausts one child completely before moving on to the next one. -class IntersectNode : public SetOperationNode { -public: - IntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); -}; - -}; // namespace doris diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp deleted file mode 100644 index 5ee5bac8f2..0000000000 --- a/be/src/exec/json_scanner.cpp +++ /dev/null @@ -1,843 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/json_scanner.h" - -#include - -#include - -#include "exec/plain_text_line_reader.h" -#include "exprs/json_functions.h" -#include "io/file_factory.h" -#include "runtime/exec_env.h" -#include "runtime/runtime_state.h" - -namespace doris { -using namespace ErrorCode; - -JsonScanner::JsonScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter) - : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), - _cur_file_reader(nullptr), - _cur_file_reader_s(nullptr), - _real_reader(nullptr), - _cur_line_reader(nullptr), - _cur_json_reader(nullptr), - _cur_reader_eof(false), - _read_json_by_line(false) { - if (params.__isset.line_delimiter_length && params.line_delimiter_length > 1) { - _line_delimiter = params.line_delimiter_str; - _line_delimiter_length = params.line_delimiter_length; - } else { - _line_delimiter.push_back(static_cast(params.line_delimiter)); - _line_delimiter_length = 1; - } -} - -JsonScanner::~JsonScanner() { - close(); -} - -Status JsonScanner::open() { - return BaseScanner::open(); -} - -Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { - SCOPED_TIMER(_read_timer); - // Get one line - while (!_scanner_eof) { - if (!_real_reader || _cur_reader_eof) { - RETURN_IF_ERROR(open_next_reader()); - // If there isn't any more reader, break this - if (_scanner_eof) { - break; - } - } - - if (_read_json_by_line && _skip_next_line) { - size_t size = 0; - const uint8_t* line_ptr = nullptr; - RETURN_IF_ERROR(_cur_line_reader->read_line(&line_ptr, &size, &_cur_reader_eof)); - _skip_next_line = false; - continue; - } - - bool is_empty_row = false; - RETURN_IF_ERROR(_cur_json_reader->read_json_row(_src_tuple, _src_slot_descs, tuple_pool, - &is_empty_row, &_cur_reader_eof)); - - if (is_empty_row) { - // Read empty row, just continue - continue; - } - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(fill_dest_tuple(tuple, tuple_pool, fill_tuple)); - break; // break always - } - if (_scanner_eof) { - *eof = true; - } else { - *eof = false; - } - return Status::OK(); -} - -Status JsonScanner::open_next_reader() { - if (_next_range >= _ranges.size()) { - _scanner_eof = true; - return Status::OK(); - } - RETURN_IF_ERROR(open_based_reader()); - RETURN_IF_ERROR(open_json_reader()); - _next_range++; - return Status::OK(); -} - -Status JsonScanner::open_based_reader() { - RETURN_IF_ERROR(open_file_reader()); - if (_read_json_by_line) { - RETURN_IF_ERROR(open_line_reader()); - } - return Status::OK(); -} - -Status JsonScanner::open_file_reader() { - const TBrokerRangeDesc& range = _ranges[_next_range]; - int64_t start_offset = range.start_offset; - if (start_offset != 0) { - start_offset -= 1; - } - if (range.__isset.read_json_by_line) { - _read_json_by_line = range.read_json_by_line; - } - - if (range.file_type == TFileType::FILE_STREAM) { - RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s)); - _real_reader = _cur_file_reader_s.get(); - } else { - RETURN_IF_ERROR(FileFactory::create_file_reader( - range.file_type, _state->exec_env(), _profile, _broker_addresses, - _params.properties, range, start_offset, _cur_file_reader)); - _real_reader = _cur_file_reader.get(); - } - _cur_reader_eof = false; - return _real_reader->open(); -} - -Status JsonScanner::open_line_reader() { - if (_cur_line_reader != nullptr) { - delete _cur_line_reader; - _cur_line_reader = nullptr; - } - - const TBrokerRangeDesc& range = _ranges[_next_range]; - int64_t size = range.size; - if (range.start_offset != 0) { - size += 1; - _skip_next_line = true; - } else { - _skip_next_line = false; - } - _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, nullptr, size, - _line_delimiter, _line_delimiter_length); - _cur_reader_eof = false; - return Status::OK(); -} - -Status JsonScanner::open_json_reader() { - if (_cur_json_reader != nullptr) { - delete _cur_json_reader; - _cur_json_reader = nullptr; - } - - std::string json_root = ""; - std::string jsonpath = ""; - bool strip_outer_array = false; - bool num_as_string = false; - bool fuzzy_parse = false; - - RETURN_IF_ERROR( - get_range_params(jsonpath, json_root, strip_outer_array, num_as_string, fuzzy_parse)); - if (_read_json_by_line) { - _cur_json_reader = - new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, - fuzzy_parse, &_scanner_eof, nullptr, _cur_line_reader); - } else { - _cur_json_reader = new JsonReader(_state, _counter, _profile, strip_outer_array, - num_as_string, fuzzy_parse, &_scanner_eof, _real_reader); - } - - RETURN_IF_ERROR(_cur_json_reader->init(jsonpath, json_root)); - return Status::OK(); -} - -Status JsonScanner::get_range_params(std::string& jsonpath, std::string& json_root, - bool& strip_outer_array, bool& num_as_string, - bool& fuzzy_parse) { - const TBrokerRangeDesc& range = _ranges[_next_range]; - - if (range.__isset.jsonpaths) { - jsonpath = range.jsonpaths; - } - if (range.__isset.json_root) { - json_root = range.json_root; - } - if (range.__isset.strip_outer_array) { - strip_outer_array = range.strip_outer_array; - } - if (range.__isset.num_as_string) { - num_as_string = range.num_as_string; - } - if (range.__isset.fuzzy_parse) { - fuzzy_parse = range.fuzzy_parse; - } - return Status::OK(); -} - -void JsonScanner::close() { - BaseScanner::close(); - if (_cur_json_reader != nullptr) { - delete _cur_json_reader; - _cur_json_reader = nullptr; - } - if (_cur_line_reader != nullptr) { - delete _cur_line_reader; - _cur_line_reader = nullptr; - } -} - -////// class JsonDataInternal -JsonDataInternal::JsonDataInternal(rapidjson::Value* v) : _json_values(v) { - if (v != nullptr) { - _iterator = v->Begin(); - } -} - -rapidjson::Value::ConstValueIterator JsonDataInternal::get_next() { - if (is_null() || _json_values->End() == _iterator) { - return nullptr; - } - return _iterator++; -} - -////// class JsonReader -JsonReader::JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, - bool strip_outer_array, bool num_as_string, bool fuzzy_parse, - bool* scanner_eof, FileReader* file_reader, LineReader* line_reader) - : _handle_json_callback(nullptr), - _next_line(0), - _total_lines(0), - _state(state), - _counter(counter), - _profile(profile), - _file_reader(file_reader), - _line_reader(line_reader), - _closed(false), - _strip_outer_array(strip_outer_array), - _num_as_string(num_as_string), - _fuzzy_parse(fuzzy_parse), - _value_allocator(_value_buffer, sizeof(_value_buffer)), - _parse_allocator(_parse_buffer, sizeof(_parse_buffer)), - _origin_json_doc(&_value_allocator, sizeof(_parse_buffer), &_parse_allocator), - _json_doc(nullptr), - _scanner_eof(scanner_eof) { - _bytes_read_counter = ADD_COUNTER(_profile, "BytesRead", TUnit::BYTES); - _read_timer = ADD_TIMER(_profile, "ReadTime"); - _file_read_timer = ADD_TIMER(_profile, "FileReadTime"); -} - -JsonReader::~JsonReader() { - _close(); -} - -Status JsonReader::init(const std::string& jsonpath, const std::string& json_root) { - // generate _parsed_jsonpaths and _parsed_json_root - RETURN_IF_ERROR(_parse_jsonpath_and_json_root(jsonpath, json_root)); - - //improve performance - if (_parsed_jsonpaths.empty()) { // input is a simple json-string - _handle_json_callback = &JsonReader::_handle_simple_json; - } else { // input is a complex json-string and a json-path - if (_strip_outer_array) { - _handle_json_callback = &JsonReader::_handle_flat_array_complex_json; - } else { - _handle_json_callback = &JsonReader::_handle_nested_complex_json; - } - } - return Status::OK(); -} - -Status JsonReader::_parse_jsonpath_and_json_root(const std::string& jsonpath, - const std::string& json_root) { - // parse jsonpath - if (!jsonpath.empty()) { - RETURN_IF_ERROR(_generate_json_paths(jsonpath, &_parsed_jsonpaths)); - } - if (!json_root.empty()) { - JsonFunctions::parse_json_paths(json_root, &_parsed_json_root); - } - return Status::OK(); -} - -Status JsonReader::_generate_json_paths(const std::string& jsonpath, - std::vector>* vect) { - rapidjson::Document jsonpaths_doc; - if (!jsonpaths_doc.Parse(jsonpath.c_str(), jsonpath.length()).HasParseError()) { - if (!jsonpaths_doc.IsArray()) { - return Status::InvalidArgument("Invalid json path: {}", jsonpath); - } else { - for (int i = 0; i < jsonpaths_doc.Size(); i++) { - const rapidjson::Value& path = jsonpaths_doc[i]; - if (!path.IsString()) { - return Status::InvalidArgument("Invalid json path: {}", jsonpath); - } - std::vector parsed_paths; - JsonFunctions::parse_json_paths(path.GetString(), &parsed_paths); - vect->push_back(std::move(parsed_paths)); - } - return Status::OK(); - } - } else { - return Status::InvalidArgument("Invalid json path: {}", jsonpath); - } -} - -void JsonReader::_close() { - if (_closed) { - return; - } - _closed = true; -} - -// read one json string from line reader or file reader and parse it to json doc. -// return Status::DataQualityError() if data has quality error. -// return other error if encounter other problems. -// return Status::OK() if parse succeed or reach EOF. -Status JsonReader::_parse_json_doc(size_t* size, bool* eof) { - // read a whole message - SCOPED_TIMER(_file_read_timer); - const uint8_t* json_str = nullptr; - std::unique_ptr json_str_ptr; - if (_line_reader != nullptr) { - RETURN_IF_ERROR(_line_reader->read_line(&json_str, size, eof)); - } else { - int64_t length = 0; - RETURN_IF_ERROR(_file_reader->read_one_message(&json_str_ptr, &length)); - json_str = json_str_ptr.get(); - *size = length; - if (length == 0) { - *eof = true; - } - } - - _bytes_read_counter += *size; - if (*eof) { - return Status::OK(); - } - - // clear memory here. - _value_allocator.Clear(); - _parse_allocator.Clear(); - bool has_parse_error = false; - // parse jsondata to JsonDoc - - // As the issue: https://github.com/Tencent/rapidjson/issues/1458 - // Now, rapidjson only support uint64_t, So lagreint load cause bug. We use kParseNumbersAsStringsFlag. - if (_num_as_string) { - has_parse_error = - _origin_json_doc - .Parse((char*)json_str, *size) - .HasParseError(); - } else { - has_parse_error = _origin_json_doc.Parse((char*)json_str, *size).HasParseError(); - } - - if (has_parse_error) { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "Parse json data for JsonDoc failed. code: {}, error info: {}", - _origin_json_doc.GetParseError(), - rapidjson::GetParseError_En(_origin_json_doc.GetParseError())); - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return std::string((char*)json_str, *size); }, - [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); - _counter->num_rows_filtered++; - if (*_scanner_eof) { - // Case A: if _scanner_eof is set to true in "append_error_msg_to_file", which means - // we meet enough invalid rows and the scanner should be stopped. - // So we set eof to true and return OK, the caller will stop the process as we meet the end of file. - *eof = true; - return Status::OK(); - } - return Status::DataQualityError(fmt::to_string(error_msg)); - } - - // set json root - if (_parsed_json_root.size() != 0) { - _json_doc = JsonFunctions::get_json_object_from_parsed_json( - _parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); - if (_json_doc == nullptr) { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", "JSON Root not found."); - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(_origin_json_doc); }, - [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); - _counter->num_rows_filtered++; - if (*_scanner_eof) { - // Same as Case A - *eof = true; - return Status::OK(); - } - return Status::DataQualityError(fmt::to_string(error_msg)); - } - } else { - _json_doc = &_origin_json_doc; - } - - if (_json_doc->IsArray() && !_strip_outer_array) { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", - "JSON data is array-object, `strip_outer_array` must be TRUE."); - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(_origin_json_doc); }, - [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); - _counter->num_rows_filtered++; - if (*_scanner_eof) { - // Same as Case A - *eof = true; - return Status::OK(); - } - return Status::DataQualityError(fmt::to_string(error_msg)); - } - - if (!_json_doc->IsArray() && _strip_outer_array) { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, "{}", - "JSON data is not an array-object, `strip_outer_array` must be FALSE."); - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(_origin_json_doc); }, - [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); - _counter->num_rows_filtered++; - if (*_scanner_eof) { - // Same as Case A - *eof = true; - return Status::OK(); - } - return Status::DataQualityError(fmt::to_string(error_msg)); - } - - return Status::OK(); -} - -std::string JsonReader::_print_json_value(const rapidjson::Value& value) { - rapidjson::StringBuffer buffer; - buffer.Clear(); - rapidjson::Writer writer(buffer); - value.Accept(writer); - return std::string(buffer.GetString()); -} - -std::string JsonReader::_print_jsonpath(const std::vector& path) { - std::stringstream ss; - for (auto& p : path) { - ss << p.to_string() << "."; - } - return ss.str(); -} - -void JsonReader::_fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, - const uint8_t* value, int32_t len) { - tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = reinterpret_cast(mem_pool->allocate(len)); - memcpy(str_slot->ptr, value, len); - str_slot->len = len; -} - -Status JsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, - SlotDescriptor* desc, Tuple* tuple, MemPool* tuple_pool, - bool* valid) { - const char* str_value = nullptr; - uint8_t tmp_buf[128] = {0}; - int32_t wbytes = 0; - switch (value->GetType()) { - case rapidjson::Type::kStringType: - str_value = value->GetString(); - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)str_value, strlen(str_value)); - break; - case rapidjson::Type::kNumberType: - if (value->IsUint()) { - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%u", value->GetUint()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsInt()) { - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value->GetInt()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsUint64()) { - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRIu64, value->GetUint64()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else if (value->IsInt64()) { - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRId64, value->GetInt64()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } else { - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%f", value->GetDouble()); - _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); - } - break; - case rapidjson::Type::kFalseType: - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"0", 1); - break; - case rapidjson::Type::kTrueType: - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"1", 1); - break; - case rapidjson::Type::kNullType: - if (desc->is_nullable()) { - tuple->set_null(desc->null_indicator_offset()); - } else { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(*value); }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, - "Json value is null, but the column `{}` is not nullable.", - desc->col_name()); - return fmt::to_string(error_msg); - }, - _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; - return Status::OK(); - } - break; - default: - // for other type like array or object. we convert it to string to save - std::string json_str = _print_json_value(*value); - _fill_slot(tuple, desc, tuple_pool, (uint8_t*)json_str.c_str(), json_str.length()); - break; - } - *valid = true; - return Status::OK(); -} - -// for simple format json -// set valid to true and return OK if succeed. -// set valid to false and return OK if we met an invalid row. -// return other status if encounter other problmes. -Status JsonReader::_set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* valid) { - if (!objectValue.IsObject()) { - // Here we expect the incoming `objectValue` to be a Json Object, such as {"key" : "value"}, - // not other type of Json format. - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { return "Expect json object value"; }, _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; // current row is invalid - return Status::OK(); - } - - int nullcount = 0; - for (auto v : slot_descs) { - rapidjson::Value::ConstMemberIterator it = objectValue.MemberEnd(); - if (_fuzzy_parse) { - auto idx_it = _name_map.find(v->col_name()); - if (idx_it != _name_map.end() && idx_it->second < objectValue.MemberCount()) { - it = objectValue.MemberBegin() + idx_it->second; - } - } else { - it = objectValue.FindMember( - rapidjson::Value(v->col_name().c_str(), v->col_name().size())); - } - if (it != objectValue.MemberEnd()) { - const rapidjson::Value& value = it->value; - RETURN_IF_ERROR(_write_data_to_tuple(&value, v, tuple, tuple_pool, valid)); - if (!(*valid)) { - return Status::OK(); - } - } else { // not found - if (v->is_nullable()) { - tuple->set_null(v->null_indicator_offset()); - nullcount++; - } else { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, - "The column `{}` is not nullable, but it's not found in " - "jsondata.", - v->col_name()); - return fmt::to_string(error_msg); - }, - _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; // current row is invalid - break; - } - } - } - - if (nullcount == slot_descs.size()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { return "All fields is null, this is a invalid row."; }, - _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; - return Status::OK(); - } - *valid = true; - return Status::OK(); -} - -/** - * handle input a simple json. - * A json is a simple json only when user not specifying the json path. - * For example: - * case 1. [{"column1":"value1", "column2":10}, {"column1":" -", "column2":30}] - * case 2. {"column1":"value1", "column2":10} - */ -Status JsonReader::_handle_simple_json(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof) { - do { - bool valid = false; - if (_next_line >= _total_lines) { // parse json and generic document - size_t size = 0; - Status st = _parse_json_doc(&size, eof); - if (st.is()) { - continue; // continue to read next - } - RETURN_IF_ERROR(st); // terminate if encounter other errors - if (size == 0 || *eof) { // read all data, then return - *is_empty_row = true; - return Status::OK(); - } - _name_map.clear(); - rapidjson::Value* objectValue = nullptr; - if (_json_doc->IsArray()) { - _total_lines = _json_doc->Size(); - if (_total_lines == 0) { - // may be passing an empty json, such as "[]" - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(*_json_doc); }, - [&]() -> std::string { return "Empty json line"; }, _scanner_eof)); - _counter->num_rows_filtered++; - if (*_scanner_eof) { - *is_empty_row = true; - return Status::OK(); - } - continue; - } - objectValue = &(*_json_doc)[0]; - } else { - _total_lines = 1; // only one row - objectValue = _json_doc; - } - _next_line = 0; - if (_fuzzy_parse) { - for (auto v : slot_descs) { - for (int i = 0; i < objectValue->MemberCount(); ++i) { - auto it = objectValue->MemberBegin() + i; - if (v->col_name() == it->name.GetString()) { - _name_map[v->col_name()] = i; - break; - } - } - } - } - } - - if (_json_doc->IsArray()) { // handle case 1 - rapidjson::Value& objectValue = (*_json_doc)[_next_line]; // json object - RETURN_IF_ERROR(_set_tuple_value(objectValue, tuple, slot_descs, tuple_pool, &valid)); - } else { // handle case 2 - RETURN_IF_ERROR(_set_tuple_value(*_json_doc, tuple, slot_descs, tuple_pool, &valid)); - } - _next_line++; - if (!valid) { - if (*_scanner_eof) { - // When _scanner_eof is true and valid is false, it means that we have encountered - // unqualified data and decided to stop the scan. - *is_empty_row = true; - return Status::OK(); - } - continue; - } - *is_empty_row = false; - break; // get a valid row, then break - } while (_next_line <= _total_lines); - return Status::OK(); -} - -Status JsonReader::_write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, - Tuple* tuple, - const std::vector& slot_descs, - bool* valid) { - int nullcount = 0; - size_t column_num = slot_descs.size(); - for (size_t i = 0; i < column_num; i++) { - rapidjson::Value* json_values = nullptr; - bool wrap_explicitly = false; - if (LIKELY(i < _parsed_jsonpaths.size())) { - json_values = JsonFunctions::get_json_array_from_parsed_json( - _parsed_jsonpaths[i], &objectValue, _origin_json_doc.GetAllocator(), - &wrap_explicitly); - } - - if (json_values == nullptr) { - // not match in jsondata. - if (slot_descs[i]->is_nullable()) { - tuple->set_null(slot_descs[i]->null_indicator_offset()); - nullcount++; - } else { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, - "The column `{}` is not nullable, but it's not found in " - "jsondata.", - slot_descs[i]->col_name()); - return fmt::to_string(error_msg); - }, - _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; // current row is invalid - break; - } - } else { - CHECK(json_values->IsArray()); - if (json_values->Size() == 1 && wrap_explicitly) { - // NOTICE1: JsonFunctions::get_json_array_from_parsed_json() will wrap the single json object with an array. - // so here we unwrap the array to get the real element. - // if json_values' size > 1, it means we just match an array, not a wrapped one, so no need to unwrap. - json_values = &((*json_values)[0]); - } - RETURN_IF_ERROR( - _write_data_to_tuple(json_values, slot_descs[i], tuple, tuple_pool, valid)); - if (!(*valid)) { - break; - } - } - } - if (nullcount == column_num) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return _print_json_value(objectValue); }, - [&]() -> std::string { - return "All fields is null or not matched, this is a invalid row."; - }, - _scanner_eof)); - _counter->num_rows_filtered++; - *valid = false; - } - return Status::OK(); -} - -/** - * for example: - * { - * "data": {"a":"a1", "b":"b1", "c":"c1"} - * } - * In this scene, generate only one row - */ -Status JsonReader::_handle_nested_complex_json(Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof) { - while (true) { - size_t size = 0; - Status st = _parse_json_doc(&size, eof); - if (st.is()) { - continue; // continue to read next - } - RETURN_IF_ERROR(st); - if (size == 0 || *eof) { - *is_empty_row = true; - return Status::OK(); // read over,then return - } - *is_empty_row = false; - break; // read a valid row - } - bool valid = true; - RETURN_IF_ERROR(_write_values_by_jsonpath(*_json_doc, tuple_pool, tuple, slot_descs, &valid)); - if (!valid) { - // there is only one line in this case, so if it return false, just set is_empty_row true - // so that the caller will continue reading next line. - *is_empty_row = true; - } - return Status::OK(); -} - -/** - * flat array for json. _json_doc should be an array - * For example: - * [{"column1":"value1", "column2":10}, {"column1":"value2", "column2":30}] - * Result: - * column1 column2 - * ------------------ - * value1 10 - * value2 30 - */ -Status JsonReader::_handle_flat_array_complex_json(Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, - bool* eof) { - do { - if (_next_line >= _total_lines) { - size_t size = 0; - Status st = _parse_json_doc(&size, eof); - if (st.is()) { - continue; // continue to read next - } - RETURN_IF_ERROR(st); // terminate if encounter other errors - if (size == 0 || *eof) { // read all data, then return - *is_empty_row = true; - return Status::OK(); - } - _total_lines = _json_doc->Size(); - _next_line = 0; - - if (_total_lines == 0) { - // meet an empty json array. - *is_empty_row = true; - continue; - } - } - rapidjson::Value& objectValue = (*_json_doc)[_next_line++]; - bool valid = true; - RETURN_IF_ERROR( - _write_values_by_jsonpath(objectValue, tuple_pool, tuple, slot_descs, &valid)); - if (!valid) { - continue; // process next line - } - *is_empty_row = false; - break; // get a valid row, then break - } while (_next_line <= _total_lines); - return Status::OK(); -} - -Status JsonReader::read_json_row(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof) { - return (this->*_handle_json_callback)(tuple, slot_descs, tuple_pool, is_empty_row, eof); -} - -} // namespace doris diff --git a/be/src/exec/json_scanner.h b/be/src/exec/json_scanner.h deleted file mode 100644 index 2d0165f33d..0000000000 --- a/be/src/exec/json_scanner.h +++ /dev/null @@ -1,202 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/base_scanner.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/stream_load/stream_load_pipe.h" -#include "runtime/tuple.h" -#include "util/runtime_profile.h" - -namespace doris { -class Tuple; -class SlotDescriptor; -class RuntimeState; -class TupleDescriptor; -class JsonReader; -class LineReader; -class FileReader; - -class JsonScanner : public BaseScanner { -public: - JsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter); - ~JsonScanner(); - - // Open this scanner, will initialize information needed - Status open() override; - - // Get next tuple - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override; - - Status get_next(vectorized::Block* block, bool* eof) override { - return Status::NotSupported("Not Implemented get block"); - } - - // Close this scanner - void close() override; - -protected: - Status open_file_reader(); - Status open_line_reader(); - Status open_json_reader(); - Status open_next_reader(); - - Status open_based_reader(); - Status get_range_params(std::string& jsonpath, std::string& json_root, bool& strip_outer_array, - bool& num_as_string, bool& fuzzy_parse); - -protected: - std::string _jsonpath; - std::string _jsonpath_file; - - std::string _line_delimiter; - int _line_delimiter_length; - - // Reader - // _cur_file_reader_s is for stream load pipe reader, - // and _cur_file_reader is for other file reader. - // TODO: refactor this to use only shared_ptr or unique_ptr - std::unique_ptr _cur_file_reader; - std::shared_ptr _cur_file_reader_s; - FileReader* _real_reader; - LineReader* _cur_line_reader; - JsonReader* _cur_json_reader; - bool _cur_reader_eof; - bool _read_json_by_line; - - // When we fetch range doesn't start from 0, - // we will read to one ahead, and skip the first line - bool _skip_next_line; -}; - -class JsonDataInternal { -public: - JsonDataInternal(rapidjson::Value* v); - ~JsonDataInternal() {} - rapidjson::Value::ConstValueIterator get_next(); - bool is_null() const { return _json_values == nullptr; } - -private: - rapidjson::Value* _json_values; - rapidjson::Value::ConstValueIterator _iterator; -}; - -struct JsonPath; -// Reader to parse the json. -// For most of its methods which return type is Status, -// return Status::OK() if process succeed or encounter data quality error. -// return other error Status if encounter other errors. -class JsonReader { -public: - JsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, - bool strip_outer_array, bool num_as_string, bool fuzzy_parse, bool* scanner_eof, - FileReader* file_reader = nullptr, LineReader* line_reader = nullptr); - - ~JsonReader(); - - Status init(const std::string& jsonpath, const std::string& json_root); // must call before use - - Status read_json_row(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof); - -protected: - Status (JsonReader::*_handle_json_callback)(Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof); - Status _handle_simple_json(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof); - Status _handle_flat_array_complex_json(Tuple* tuple, - const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empy_row, bool* eof); - Status _handle_nested_complex_json(Tuple* tuple, const std::vector& slot_descs, - MemPool* tuple_pool, bool* is_empty_row, bool* eof); - - void _fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, - const uint8_t* value, int32_t len); - Status _parse_json_doc(size_t* size, bool* eof); - Status _set_tuple_value(rapidjson::Value& objectValue, Tuple* tuple, - const std::vector& slot_descs, MemPool* tuple_pool, - bool* valid); - Status _write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, - Tuple* tuple, MemPool* tuple_pool, bool* valid); - Status _write_values_by_jsonpath(rapidjson::Value& objectValue, MemPool* tuple_pool, - Tuple* tuple, const std::vector& slot_descs, - bool* valid); - std::string _print_json_value(const rapidjson::Value& value); - std::string _print_jsonpath(const std::vector& path); - - void _close(); - Status _generate_json_paths(const std::string& jsonpath, - std::vector>* vect); - Status _parse_jsonpath_and_json_root(const std::string& jsonpath, const std::string& json_root); - -protected: - int _next_line; - int _total_lines; - RuntimeState* _state; - ScannerCounter* _counter; - RuntimeProfile* _profile; - FileReader* _file_reader; - LineReader* _line_reader; - bool _closed; - bool _strip_outer_array; - bool _num_as_string; - bool _fuzzy_parse; - RuntimeProfile::Counter* _bytes_read_counter; - RuntimeProfile::Counter* _read_timer; - RuntimeProfile::Counter* _file_read_timer; - - std::vector> _parsed_jsonpaths; - std::vector _parsed_json_root; - - char _value_buffer[4 * 1024 * 1024]; - char _parse_buffer[512 * 1024]; - - typedef rapidjson::GenericDocument, rapidjson::MemoryPoolAllocator<>, - rapidjson::MemoryPoolAllocator<>> - Document; - rapidjson::MemoryPoolAllocator<> _value_allocator; - rapidjson::MemoryPoolAllocator<> _parse_allocator; - Document _origin_json_doc; // origin json document object from parsed json string - rapidjson::Value* _json_doc; // _json_doc equals _final_json_doc iff not set `json_root` - std::unordered_map _name_map; - - // point to the _scanner_eof of JsonScanner - bool* _scanner_eof; -}; - -} // namespace doris diff --git a/be/src/exec/merge_node.cpp b/be/src/exec/merge_node.cpp deleted file mode 100644 index f0f57950b7..0000000000 --- a/be/src/exec/merge_node.cpp +++ /dev/null @@ -1,260 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/cloudera/Impala/blob/v0.7refresh/be/src/exec/merge-node.cc -// and modified by Doris - -#include "exec/merge_node.h" - -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -using std::vector; - -namespace doris { - -MergeNode::MergeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _tuple_id(tnode.merge_node.tuple_id), - _const_result_expr_idx(0), - _child_idx(INVALID_CHILD_IDX), - _child_row_batch(nullptr), - _child_eos(false), - _child_row_idx(0) {} - -Status MergeNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - DCHECK(tnode.__isset.merge_node); - // Create _const_expr_lists from thrift exprs. - const std::vector>& const_texpr_lists = tnode.merge_node.const_expr_lists; - for (int i = 0; i < const_texpr_lists.size(); ++i) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, const_texpr_lists[i], &ctxs)); - _const_result_expr_ctx_lists.push_back(ctxs); - } - // Create _result_expr__ctx_lists from thrift exprs. - const std::vector>& result_texpr_lists = tnode.merge_node.result_expr_lists; - for (int i = 0; i < result_texpr_lists.size(); ++i) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, result_texpr_lists[i], &ctxs)); - _result_expr_ctx_lists.push_back(ctxs); - } - - return Status::OK(); -} - -Status MergeNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - DCHECK(_tuple_desc != nullptr); - - // Prepare const expr lists. - for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_const_result_expr_ctx_lists[i], state, row_desc())); - DCHECK_EQ(_const_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); - } - - // prepare materialized_slots_ - for (int i = 0; i < _tuple_desc->slots().size(); ++i) { - SlotDescriptor* desc = _tuple_desc->slots()[i]; - if (desc->is_materialized()) { - _materialized_slots.push_back(desc); - } - } - - // Prepare result expr lists. - for (int i = 0; i < _result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_result_expr_ctx_lists[i], state, child(i)->row_desc())); - // DCHECK_EQ(_result_expr_ctx_lists[i].size(), _tuple_desc->slots().size()); - DCHECK_EQ(_result_expr_ctx_lists[i].size(), _materialized_slots.size()); - } - - return Status::OK(); -} - -Status MergeNode::open(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // Prepare const expr lists. - for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::open(_const_result_expr_ctx_lists[i], state)); - } - - // Prepare result expr lists. - for (int i = 0; i < _result_expr_ctx_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::open(_result_expr_ctx_lists[i], state)); - } - - return Status::OK(); -} - -Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // Create new tuple buffer for row_batch. - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = row_batch->tuple_data_pool()->allocate(tuple_buffer_size); - bzero(tuple_buffer, tuple_buffer_size); - Tuple* tuple = reinterpret_cast(tuple_buffer); - - // Evaluate and materialize the const expr lists exactly once. - while (_const_result_expr_idx < _const_result_expr_ctx_lists.size()) { - // Materialize expr results into row_batch. - eval_and_materialize_exprs(_const_result_expr_ctx_lists[_const_result_expr_idx], true, - &tuple, row_batch); - ++_const_result_expr_idx; - *eos = reached_limit(); - - if (*eos || row_batch->is_full()) { - return Status::OK(); - } - } - - if (_child_idx == INVALID_CHILD_IDX) { - _child_idx = 0; - } - - // Fetch from children, evaluate corresponding exprs and materialize. - while (_child_idx < _children.size()) { - // Row batch was either never set or we're moving on to a different child. - if (_child_row_batch.get() == nullptr) { - RETURN_IF_CANCELLED(state); - _child_row_batch.reset( - new RowBatch(child(_child_idx)->row_desc(), state->batch_size())); - // Open child and fetch the first row batch. - RETURN_IF_ERROR(child(_child_idx)->open(state)); - RETURN_IF_ERROR( - child(_child_idx)->get_next(state, _child_row_batch.get(), &_child_eos)); - _child_row_idx = 0; - } - - // Start (or continue) consuming row batches from current child. - while (true) { - // Continue materializing exprs on _child_row_batch into row batch. - if (eval_and_materialize_exprs(_result_expr_ctx_lists[_child_idx], false, &tuple, - row_batch)) { - *eos = reached_limit(); - - if (*eos) { - _child_idx = INVALID_CHILD_IDX; - } - - return Status::OK(); - } - - // Fetch new batch if one is available, otherwise move on to next child. - if (_child_eos) { - break; - } - - RETURN_IF_CANCELLED(state); - _child_row_batch->reset(); - RETURN_IF_ERROR( - child(_child_idx)->get_next(state, _child_row_batch.get(), &_child_eos)); - _child_row_idx = 0; - } - - // Close current child and move on to next one. - ++_child_idx; - _child_row_batch.reset(nullptr); - } - - _child_idx = INVALID_CHILD_IDX; - *eos = true; - return Status::OK(); -} - -Status MergeNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - // don't call ExecNode::close(), it always closes all children - _child_row_batch.reset(nullptr); - for (int i = 0; i < _const_result_expr_ctx_lists.size(); ++i) { - Expr::close(_const_result_expr_ctx_lists[i], state); - } - for (int i = 0; i < _result_expr_ctx_lists.size(); ++i) { - Expr::close(_result_expr_ctx_lists[i], state); - } - - return ExecNode::close(state); -} - -bool MergeNode::eval_and_materialize_exprs(const std::vector& ctxs, bool const_exprs, - Tuple** tuple, RowBatch* row_batch) { - // Make sure there are rows left in the batch. - if (!const_exprs && _child_row_idx >= _child_row_batch->num_rows()) { - return false; - } - - // Execute the body at least once. - bool done = true; - ExprContext* const* conjunct_ctxs = &_conjunct_ctxs[0]; - int num_conjunct_ctxs = _conjunct_ctxs.size(); - - do { - TupleRow* child_row = nullptr; - - if (!const_exprs) { - DCHECK(_child_row_batch != nullptr); - // Non-const expr list. Fetch next row from batch. - child_row = _child_row_batch->get_row(_child_row_idx); - ++_child_row_idx; - done = _child_row_idx >= _child_row_batch->num_rows(); - } - - // Add a new row to the batch. - int row_idx = row_batch->add_row(); - DCHECK(row_idx != RowBatch::INVALID_ROW_INDEX); - TupleRow* row = row_batch->get_row(row_idx); - row->set_tuple(0, *tuple); - - // Materialize expr results into tuple. - for (int i = 0; i < ctxs.size(); ++i) { - SlotDescriptor* slot_desc = _tuple_desc->slots()[i]; - RawValue::write(ctxs[i]->get_value(child_row), *tuple, slot_desc, - row_batch->tuple_data_pool()); - } - - if (ExecNode::eval_conjuncts(conjunct_ctxs, num_conjunct_ctxs, row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - char* new_tuple = reinterpret_cast(*tuple); - new_tuple += _tuple_desc->byte_size(); - *tuple = reinterpret_cast(new_tuple); - } else { - // Make sure to reset null indicators since we're overwriting - // the tuple assembled for the previous row. - (*tuple)->init(_tuple_desc->byte_size()); - } - - if (row_batch->is_full() || row_batch->at_resource_limit() || reached_limit()) { - return true; - } - } while (!done); - - return false; -} - -} // namespace doris diff --git a/be/src/exec/merge_node.h b/be/src/exec/merge_node.h deleted file mode 100644 index a25ea19867..0000000000 --- a/be/src/exec/merge_node.h +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/cloudera/Impala/blob/v0.7refresh/be/src/exec/merge-node.h -// and modified by Doris - -#pragma once - -#include "exec/exec_node.h" - -namespace doris { - -class Tuple; -class TupleRow; - -// Node that merges the results of its children by materializing their -// evaluated expressions into row batches. The MergeNode pulls row batches sequentially -// from its children sequentially, i.e., it exhausts one child completely before moving -// on to the next one. -class MergeNode : public ExecNode { -public: - MergeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~MergeNode() {} - - // Create const exprs, child exprs and conjuncts from corresponding thrift exprs. - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -private: - const static int INVALID_CHILD_IDX = -1; - - // Tuple id resolved in prepare() to set _tuple_desc; - int _tuple_id; - - // Descriptor for tuples this merge node constructs. - const TupleDescriptor* _tuple_desc; - - // those tuple_desc_->slots() which are materialized, in the same order - std::vector _materialized_slots; - - // Const exprs materialized by this node. These exprs don't refer to any children. - std::vector> _const_result_expr_ctx_lists; - - // Exprs materialized by this node. The i-th result expr list refers to the i-th child. - std::vector> _result_expr_ctx_lists; - - // Index of current const result expr list. - int _const_result_expr_idx; - - // Index of current child. - int _child_idx; - - // Current row batch of current child. We reset the pointer to a new RowBatch - // when switching to a different child. - std::unique_ptr _child_row_batch; - - // Saved from the last to get_next() on the current child. - bool _child_eos; - - // Index of current row in _child_row_batch. - int _child_row_idx; - - // Evaluates exprs on all rows in _child_row_batch starting from _child_row_idx, - // and materializes their results into *tuple. - // Adds *tuple into row_batch, and increments *tuple. - // If const_exprs is true, then the exprs are evaluated exactly once without - // fetching rows from _child_row_batch. - // Only commits tuples to row_batch if they are not filtered by conjuncts. - // Returns true if row_batch should be returned to caller or limit has been - // reached, false otherwise. - bool eval_and_materialize_exprs(const std::vector& exprs, bool const_exprs, - Tuple** tuple, RowBatch* row_batch); -}; - -} // namespace doris diff --git a/be/src/exec/mysql_scan_node.cpp b/be/src/exec/mysql_scan_node.cpp deleted file mode 100644 index 734ebeeba0..0000000000 --- a/be/src/exec/mysql_scan_node.cpp +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "mysql_scan_node.h" - -#include - -#include "exec/text_converter.hpp" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" - -namespace doris { - -MysqlScanNode::MysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _is_init(false), - _table_name(tnode.mysql_scan_node.table_name), - _tuple_id(tnode.mysql_scan_node.tuple_id), - _columns(tnode.mysql_scan_node.columns), - _filters(tnode.mysql_scan_node.filters), - _tuple_desc(nullptr), - _slot_num(0) {} - -MysqlScanNode::~MysqlScanNode() {} - -Status MysqlScanNode::prepare(RuntimeState* state) { - VLOG_CRITICAL << "MysqlScanNode::Prepare"; - - if (_is_init) { - return Status::OK(); - } - - if (nullptr == state) { - return Status::InternalError("input pointer is nullptr."); - } - - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // get tuple desc - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - - if (nullptr == _tuple_desc) { - return Status::InternalError("Failed to get tuple descriptor."); - } - - _slot_num = _tuple_desc->slots().size(); - // get mysql info - const MySQLTableDescriptor* mysql_table = - static_cast(_tuple_desc->table_desc()); - - if (nullptr == mysql_table) { - return Status::InternalError("mysql table pointer is nullptr."); - } - - _my_param.host = mysql_table->host(); - _my_param.port = mysql_table->port(); - _my_param.user = mysql_table->user(); - _my_param.passwd = mysql_table->passwd(); - _my_param.db = mysql_table->mysql_db(); - _my_param.charset = mysql_table->charset(); - // new one scanner - _mysql_scanner.reset(new (std::nothrow) MysqlScanner(_my_param)); - - if (_mysql_scanner.get() == nullptr) { - return Status::InternalError("new a mysql scanner failed."); - } - - _tuple_pool.reset(new (std::nothrow) MemPool()); - - if (_tuple_pool.get() == nullptr) { - return Status::InternalError("new a mem pool failed."); - } - - _text_converter.reset(new (std::nothrow) TextConverter('\\')); - - if (_text_converter.get() == nullptr) { - return Status::InternalError("new a text convertor failed."); - } - - _is_init = true; - - return Status::OK(); -} - -Status MysqlScanNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - VLOG_CRITICAL << "MysqlScanNode::Open"; - - if (nullptr == state) { - return Status::InternalError("input pointer is nullptr."); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(_mysql_scanner->open()); - RETURN_IF_ERROR(_mysql_scanner->query(_table_name, _columns, _filters, _limit)); - - // check materialize slot num - int materialize_num = 0; - - for (int i = 0; i < _tuple_desc->slots().size(); ++i) { - if (_tuple_desc->slots()[i]->is_materialized()) { - materialize_num++; - } - } - - if (_mysql_scanner->field_num() != materialize_num) { - return Status::InternalError("input and output not equal."); - } - - return Status::OK(); -} - -Status MysqlScanNode::write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state) { - if (!_text_converter->write_slot(slot, _tuple, value, value_length, true, false, - _tuple_pool.get())) { - return Status::InternalError("Fail to convert mysql value:'{}' to {} on column:`{}`", value, - slot->type().debug_string(), slot->col_name()); - } - - return Status::OK(); -} - -Status MysqlScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - VLOG_CRITICAL << "MysqlScanNode::GetNext"; - - if (nullptr == state || nullptr == row_batch || nullptr == eos) { - return Status::InternalError("input is nullptr pointer"); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - // create new tuple buffer for row_batch - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size); - - if (nullptr == tuple_buffer) { - return Status::InternalError("Allocate memory failed."); - } - - _tuple = reinterpret_cast(tuple_buffer); - // Indicates whether there are more rows to process. Set in _hbase_scanner.next(). - bool mysql_eos = false; - - while (true) { - RETURN_IF_CANCELLED(state); - - if (row_batch->is_full()) { - // hang on to last allocated chunk in pool, we'll keep writing into it in the - // next get_next() call - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit()); - return Status::OK(); - } - - // read mysql - char** data = nullptr; - unsigned long* length = nullptr; - RETURN_IF_ERROR(_mysql_scanner->get_next_row(&data, &length, &mysql_eos)); - - if (mysql_eos) { - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); - *eos = true; - return Status::OK(); - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - // scan node is the first tuple of tuple row - row->set_tuple(0, _tuple); - memset(_tuple, 0, _tuple_desc->num_null_bytes()); - int j = 0; - - for (int i = 0; i < _slot_num; ++i) { - auto slot_desc = _tuple_desc->slots()[i]; - // because the fe planner filter the non_materialize column - if (!slot_desc->is_materialized()) { - continue; - } - - if (data[j] == nullptr) { - if (slot_desc->is_nullable()) { - _tuple->set_null(slot_desc->null_indicator_offset()); - } else { - return Status::InternalError( - "nonnull column contains nullptr. table={}, column={}", _table_name, - slot_desc->col_name()); - } - } else { - RETURN_IF_ERROR(write_text_slot(data[j], length[j], slot_desc, state)); - } - - j++; - } - - // MySQL has filter all rows, no need check. - { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - char* new_tuple = reinterpret_cast(_tuple); - new_tuple += _tuple_desc->byte_size(); - _tuple = reinterpret_cast(new_tuple); - } - } - - return Status::OK(); -} - -Status MysqlScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - _tuple_pool.reset(); - - return ExecNode::close(state); -} - -void MysqlScanNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "MysqlScanNode(tupleid=" << _tuple_id << " table=" << _table_name; - *out << ")" << std::endl; - - for (int i = 0; i < _children.size(); ++i) { - _children[i]->debug_string(indentation_level + 1, out); - } -} - -Status MysqlScanNode::set_scan_ranges(const std::vector& scan_ranges) { - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/mysql_scan_node.h b/be/src/exec/mysql_scan_node.h deleted file mode 100644 index 484cbef200..0000000000 --- a/be/src/exec/mysql_scan_node.h +++ /dev/null @@ -1,94 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "exec/mysql_scanner.h" -#include "exec/scan_node.h" -#include "runtime/descriptors.h" - -namespace doris { - -class TextConverter; -class Tuple; -class TupleDescriptor; -class RuntimeState; -class MemPool; -class Status; - -class MysqlScanNode : public ScanNode { -public: - MysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~MysqlScanNode(); - - // initialize _mysql_scanner, and create _text_converter. - virtual Status prepare(RuntimeState* state); - - // Start MySQL scan using _mysql_scanner. - virtual Status open(RuntimeState* state); - - // Fill the next row batch by calling next() on the _mysql_scanner, - // converting text data in MySQL cells to binary data. - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - - // Close the _mysql_scanner, and report errors. - virtual Status close(RuntimeState* state); - - // No use - virtual Status set_scan_ranges(const std::vector& scan_ranges); - -protected: - // Write debug string of this into out. - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - // Writes a slot in _tuple from an MySQL value containing text data. - // The Mysql value is converted into the appropriate target type. - Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state); - -protected: - bool _is_init; - MysqlScannerParam _my_param; - // Name of Mysql table - std::string _table_name; - - // Tuple id resolved in prepare() to set _tuple_desc; - TupleId _tuple_id; - - // select columns - std::vector _columns; - // where clause - std::vector _filters; - - // Descriptor of tuples read from MySQL table. - const TupleDescriptor* _tuple_desc; - // Tuple index in tuple row. - int _slot_num; - // Pool for allocating tuple data, including all varying-length slots. - std::unique_ptr _tuple_pool; - // Jni helper for scanning an HBase table. - std::unique_ptr _mysql_scanner; - // Helper class for converting text to other types; - std::unique_ptr _text_converter; - // Current tuple. - Tuple* _tuple = nullptr; -}; - -} // namespace doris diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp deleted file mode 100644 index 23b9ec51b1..0000000000 --- a/be/src/exec/odbc_scan_node.cpp +++ /dev/null @@ -1,255 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "odbc_scan_node.h" - -#include - -#include "exec/text_converter.hpp" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" - -namespace doris { - -OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs, - std::string scan_node_type) - : ScanNode(pool, tnode, descs), - _is_init(false), - _scan_node_type(std::move(scan_node_type)), - _table_name(tnode.odbc_scan_node.table_name), - _connect_string(std::move(tnode.odbc_scan_node.connect_string)), - _query_string(std::move(tnode.odbc_scan_node.query_string)), - _tuple_id(tnode.odbc_scan_node.tuple_id), - _tuple_desc(nullptr), - _slot_num(0) {} - -OdbcScanNode::~OdbcScanNode() {} - -Status OdbcScanNode::prepare(RuntimeState* state) { - VLOG_CRITICAL << _scan_node_type << "::Prepare"; - - if (_is_init) { - return Status::OK(); - } - - if (nullptr == state) { - return Status::InternalError("input pointer is null."); - } - - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // get tuple desc - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - - if (nullptr == _tuple_desc) { - return Status::InternalError("Failed to get tuple descriptor."); - } - - _slot_num = _tuple_desc->slots().size(); - - _odbc_param.connect_string = std::move(_connect_string); - _odbc_param.query_string = std::move(_query_string); - _odbc_param.tuple_desc = _tuple_desc; - - _odbc_scanner.reset(new (std::nothrow) ODBCConnector(_odbc_param)); - - if (_odbc_scanner.get() == nullptr) { - return Status::InternalError("new a odbc scanner failed."); - } - - _tuple_pool.reset(new (std::nothrow) MemPool()); - - if (_tuple_pool.get() == nullptr) { - return Status::InternalError("new a mem pool failed."); - } - - _text_converter.reset(new (std::nothrow) TextConverter('\\')); - - if (_text_converter.get() == nullptr) { - return Status::InternalError("new a text convertor failed."); - } - - _is_init = true; - - return Status::OK(); -} - -Status OdbcScanNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - VLOG_CRITICAL << _scan_node_type << "::Open"; - - if (nullptr == state) { - return Status::InternalError("input pointer is null."); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(_odbc_scanner->open(state)); - RETURN_IF_ERROR(_odbc_scanner->query()); - // check materialize slot num - - return Status::OK(); -} - -Status OdbcScanNode::write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state) { - if (!_text_converter->write_slot(slot, _tuple, value, value_length, true, false, - _tuple_pool.get())) { - std::stringstream ss; - ss << "Fail to convert odbc value:'" << value << "' to " << slot->type() << " on column:`" - << slot->col_name() + "`"; - return Status::InternalError(ss.str()); - } - - return Status::OK(); -} - -Status OdbcScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - VLOG_CRITICAL << _scan_node_type << "::GetNext"; - - if (nullptr == state || nullptr == row_batch || nullptr == eos) { - return Status::InternalError("input is nullptr pointer"); - } - - if (!_is_init) { - return Status::InternalError("used before initialize."); - } - - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - // create new tuple buffer for row_batch - int tuple_buffer_size = row_batch->capacity() * _tuple_desc->byte_size(); - void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size); - - if (nullptr == tuple_buffer) { - return Status::InternalError("Allocate memory failed."); - } - - _tuple = reinterpret_cast(tuple_buffer); - // Indicates whether there are more rows to process. Set in _odbc_scanner.next(). - bool odbc_eos = false; - - while (true) { - RETURN_IF_CANCELLED(state); - - if (reached_limit() || row_batch->is_full()) { - // hang on to last allocated chunk in pool, we'll keep writing into it in the - // next get_next() call - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit()); - *eos = reached_limit(); - return Status::OK(); - } - - RETURN_IF_ERROR(_odbc_scanner->get_next_row(&odbc_eos)); - - if (odbc_eos) { - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); - *eos = true; - return Status::OK(); - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - // scan node is the first tuple of tuple row - row->set_tuple(0, _tuple); - memset(_tuple, 0, _tuple_desc->num_null_bytes()); - int j = 0; - - for (int i = 0; i < _slot_num; ++i) { - auto slot_desc = _tuple_desc->slots()[i]; - // because the fe planner filter the non_materialize column - if (!slot_desc->is_materialized()) { - continue; - } - - const auto& column_data = _odbc_scanner->get_column_data(j); - if (column_data.strlen_or_ind == SQL_NULL_DATA) { - if (slot_desc->is_nullable()) { - _tuple->set_null(slot_desc->null_indicator_offset()); - } else { - return Status::InternalError( - "nonnull column contains nullptr. table={}, column={}", _table_name, - slot_desc->col_name()); - } - } else if (column_data.strlen_or_ind > column_data.buffer_length) { - return Status::InternalError( - "column value length longer than buffer length. " - "table={}, column={}, buffer_length", - _table_name, slot_desc->col_name(), column_data.buffer_length); - } else { - RETURN_IF_ERROR(write_text_slot(static_cast(column_data.target_value_ptr), - column_data.strlen_or_ind, slot_desc, state)); - } - j++; - } - - ExprContext* const* ctxs = &_conjunct_ctxs[0]; - int num_ctxs = _conjunct_ctxs.size(); - - // ODBC scanner can not filter conjunct with function, need check conjunct again. - if (ExecNode::eval_conjuncts(ctxs, num_ctxs, row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - char* new_tuple = reinterpret_cast(_tuple); - new_tuple += _tuple_desc->byte_size(); - _tuple = reinterpret_cast(new_tuple); - } - } -} - -Status OdbcScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - _tuple_pool.reset(); - - return ExecNode::close(state); -} - -void OdbcScanNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << _scan_node_type << "(tupleid=" << _tuple_id << " table=" << _table_name; - *out << ")" << std::endl; - - for (int i = 0; i < _children.size(); ++i) { - _children[i]->debug_string(indentation_level + 1, out); - } -} - -Status OdbcScanNode::set_scan_ranges(const std::vector& scan_ranges) { - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/odbc_scan_node.h b/be/src/exec/odbc_scan_node.h deleted file mode 100644 index 884e172607..0000000000 --- a/be/src/exec/odbc_scan_node.h +++ /dev/null @@ -1,101 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "exec/odbc_connector.h" -#include "exec/scan_node.h" -#include "runtime/descriptors.h" - -namespace doris { - -class TextConverter; -class Tuple; -class TupleDescriptor; -class RuntimeState; -class MemPool; -class Status; - -class OdbcScanNode : public ScanNode { -public: - OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs, - std::string scan_node_type = "OdbcScanNode"); - ~OdbcScanNode(); - - // initialize _odbc_scanner, and create _text_converter. - virtual Status prepare(RuntimeState* state); - - // Start ODBC scan using _odbc_scanner. - virtual Status open(RuntimeState* state); - - // Fill the next row batch by calling next() on the _odbc_scanner, - // converting text data in ODBC cells to binary data. - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - - // Close the _odbc_scanner, and report errors. - virtual Status close(RuntimeState* state); - - // No use - virtual Status set_scan_ranges(const std::vector& scan_ranges); - const TupleDescriptor* get_tuple_desc() { return _tuple_desc; } - TextConverter* get_text_converter() { return _text_converter.get(); } - ODBCConnector* get_odbc_scanner() { return _odbc_scanner.get(); } - const std::string& get_scan_node_type() { return _scan_node_type; } - - bool is_init() { return _is_init; } - -protected: - // Write debug string of this into out. - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - // Writes a slot in _tuple from an MySQL value containing text data. - // The Odbc value is converted into the appropriate target type. - Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state); - - bool _is_init; - - std::string _scan_node_type; - - // Name of Odbc table - std::string _table_name; - - std::string _connect_string; - - std::string _query_string; - // Tuple id resolved in prepare() to set _tuple_desc; - TupleId _tuple_id; - - // Descriptor of tuples read from ODBC table. - const TupleDescriptor* _tuple_desc; - // Tuple index in tuple row. - int _slot_num; - // Pool for allocating tuple data, including all varying-length slots. - std::unique_ptr _tuple_pool; - - // Scanner of ODBC. - std::unique_ptr _odbc_scanner; - ODBCConnectorParam _odbc_param; - // Helper class for converting text to other types; - std::unique_ptr _text_converter; - // Current tuple. - Tuple* _tuple = nullptr; -}; -} // namespace doris diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp deleted file mode 100644 index 8abca00c84..0000000000 --- a/be/src/exec/olap_scan_node.cpp +++ /dev/null @@ -1,1853 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/olap_scan_node.h" - -#include -#include -#include -#include - -#include "agent/cgroups_mgr.h" -#include "common/logging.h" -#include "common/resource_tls.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/runtime_filter.h" -#include "gen_cpp/PlanNodes_types.h" -#include "olap/storage_engine.h" -#include "runtime/exec_env.h" -#include "runtime/large_int_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_filter_mgr.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "util/priority_thread_pool.hpp" -#include "util/runtime_profile.h" -#include "util/thread.h" -#include "util/to_string.h" - -namespace doris { - -#define DS_SUCCESS(x) ((x) >= 0) - -OlapScanNode::OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _tuple_id(tnode.olap_scan_node.tuple_id), - _olap_scan_node(tnode.olap_scan_node), - _tuple_desc(nullptr), - _tuple_idx(0), - _eos(false), - _max_materialized_row_batches(config::doris_scanner_queue_size), - _start(false), - _scanner_done(false), - _transfer_done(false), - _status(Status::OK()), - _resource_info(nullptr), - _buffered_bytes(0), - _eval_conjuncts_fn(nullptr), - _runtime_filter_descs(tnode.runtime_filters) {} - -Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - _direct_conjunct_size = _conjunct_ctxs.size(); - - const TQueryOptions& query_options = state->query_options(); - if (query_options.__isset.max_scan_key_num) { - _max_scan_key_num = query_options.max_scan_key_num; - } else { - _max_scan_key_num = config::doris_max_scan_key_num; - } - - if (query_options.__isset.max_pushdown_conditions_per_column) { - _max_pushdown_conditions_per_column = query_options.max_pushdown_conditions_per_column; - } else { - _max_pushdown_conditions_per_column = config::max_pushdown_conditions_per_column; - } - - _max_scanner_queue_size_bytes = query_options.mem_limit / 20; //TODO: session variable percent - - /// TODO: could one filter used in the different scan_node ? - int filter_size = _runtime_filter_descs.size(); - _runtime_filter_ctxs.resize(filter_size); - for (int i = 0; i < filter_size; ++i) { - IRuntimeFilter* runtime_filter = nullptr; - const auto& filter_desc = _runtime_filter_descs[i]; - RETURN_IF_ERROR(state->runtime_filter_mgr()->register_filter( - RuntimeFilterRole::CONSUMER, filter_desc, state->query_options(), id())); - RETURN_IF_ERROR(state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, - &runtime_filter)); - - _runtime_filter_ctxs[i].runtimefilter = runtime_filter; - } - _batch_size = _limit == -1 ? state->batch_size() - : std::min(static_cast(state->batch_size()), _limit); - return Status::OK(); -} - -void OlapScanNode::init_scan_profile() { - std::string scanner_profile_name = "OlapScanner"; - if (_olap_scan_node.__isset.table_name) { - scanner_profile_name = fmt::format("OlapScanner({0})", _olap_scan_node.table_name); - } - _scanner_profile.reset(new RuntimeProfile(scanner_profile_name)); - runtime_profile()->add_child(_scanner_profile.get(), true, nullptr); - - _segment_profile.reset(new RuntimeProfile("SegmentIterator")); - _scanner_profile->add_child(_segment_profile.get(), true, nullptr); -} - -void OlapScanNode::_init_counter(RuntimeState* state) { - ADD_TIMER(_scanner_profile, "ShowHintsTime_V1"); - - _reader_init_timer = ADD_TIMER(_scanner_profile, "ReaderInitTime"); - _read_compressed_counter = ADD_COUNTER(_segment_profile, "CompressedBytesRead", TUnit::BYTES); - _read_uncompressed_counter = - ADD_COUNTER(_segment_profile, "UncompressedBytesRead", TUnit::BYTES); - _block_load_timer = ADD_TIMER(_segment_profile, "BlockLoadTime"); - _block_load_counter = ADD_COUNTER(_segment_profile, "BlocksLoad", TUnit::UNIT); - _block_fetch_timer = ADD_TIMER(_scanner_profile, "BlockFetchTime"); - _raw_rows_counter = ADD_COUNTER(_segment_profile, "RawRowsRead", TUnit::UNIT); - _block_convert_timer = ADD_TIMER(_scanner_profile, "BlockConvertTime"); - _block_seek_timer = ADD_TIMER(_segment_profile, "BlockSeekTime"); - _block_seek_counter = ADD_COUNTER(_segment_profile, "BlockSeekCount", TUnit::UNIT); - - _rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT); - _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime"); - _short_cond_timer = ADD_TIMER(_segment_profile, "ShortPredEvalTime"); - _first_read_timer = ADD_TIMER(_segment_profile, "FirstReadTime"); - _lazy_read_timer = ADD_TIMER(_segment_profile, "LazyReadTime"); - _output_col_timer = ADD_TIMER(_segment_profile, "OutputColumnTime"); - - _stats_filtered_counter = ADD_COUNTER(_segment_profile, "RowsStatsFiltered", TUnit::UNIT); - _bf_filtered_counter = ADD_COUNTER(_segment_profile, "RowsBloomFilterFiltered", TUnit::UNIT); - _del_filtered_counter = ADD_COUNTER(_scanner_profile, "RowsDelFiltered", TUnit::UNIT); - _conditions_filtered_counter = - ADD_COUNTER(_segment_profile, "RowsConditionsFiltered", TUnit::UNIT); - _key_range_filtered_counter = - ADD_COUNTER(_segment_profile, "RowsKeyRangeFiltered", TUnit::UNIT); - - _io_timer = ADD_TIMER(_segment_profile, "IOTimer"); - _decompressor_timer = ADD_TIMER(_segment_profile, "DecompressorTimer"); - _index_load_timer = ADD_TIMER(_segment_profile, "IndexLoadTime_V1"); - - _scan_timer = ADD_TIMER(_scanner_profile, "ScanTime"); - _scan_cpu_timer = ADD_TIMER(_scanner_profile, "ScanCpuTime"); - - _total_pages_num_counter = ADD_COUNTER(_segment_profile, "TotalPagesNum", TUnit::UNIT); - _cached_pages_num_counter = ADD_COUNTER(_segment_profile, "CachedPagesNum", TUnit::UNIT); - - _bitmap_index_filter_counter = - ADD_COUNTER(_segment_profile, "RowsBitmapIndexFiltered", TUnit::UNIT); - _bitmap_index_filter_timer = ADD_TIMER(_segment_profile, "BitmapIndexFilterTimer"); - - _num_scanners = ADD_COUNTER(_runtime_profile, "NumScanners", TUnit::UNIT); - - _filtered_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentFiltered", TUnit::UNIT); - _total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", TUnit::UNIT); - - // time of transfer thread to wait for row batch from scan thread - _scanner_wait_batch_timer = ADD_TIMER(_runtime_profile, "ScannerBatchWaitTime"); - // time of scan thread to wait for worker thread of the thread pool - _scanner_wait_worker_timer = ADD_TIMER(_runtime_profile, "ScannerWorkerWaitTime"); - - // time of node to wait for batch/block queue - _olap_wait_batch_queue_timer = ADD_TIMER(_runtime_profile, "BatchQueueWaitTime"); - - // for the purpose of debugging or profiling - for (int i = 0; i < GENERAL_DEBUG_COUNT; ++i) { - char name[64]; - snprintf(name, sizeof(name), "GeneralDebugTimer%d", i); - _general_debug_timer[i] = ADD_TIMER(_segment_profile, name); - } -} - -Status OlapScanNode::prepare(RuntimeState* state) { - init_scan_profile(); - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // create scanner profile - // create timer - _tablet_counter = ADD_COUNTER(runtime_profile(), "TabletCount ", TUnit::UNIT); - _scanner_sched_counter = ADD_COUNTER(runtime_profile(), "ScannerSchedCount ", TUnit::UNIT); - - _rows_pushed_cond_filtered_counter = - ADD_COUNTER(_scanner_profile, "RowsPushedCondFiltered", TUnit::UNIT); - _init_counter(state); - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - - _scanner_mem_tracker = std::make_shared("Scanners"); - - if (_tuple_desc == nullptr) { - // TODO: make sure we print all available diagnostic output to our error log - return Status::InternalError("Failed to get tuple descriptor."); - } - - _runtime_profile->add_info_string("Table", _tuple_desc->table_desc()->name()); - - const std::vector& slots = _tuple_desc->slots(); - - for (int i = 0; i < slots.size(); ++i) { - if (!slots[i]->is_materialized()) { - continue; - } - - if (slots[i]->type().is_collection_type()) { - _collection_slots.push_back(slots[i]); - } - - if (slots[i]->type().is_string_type()) { - _string_slots.push_back(slots[i]); - } - } - - _runtime_state = state; - for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { - IRuntimeFilter* runtime_filter = nullptr; - state->runtime_filter_mgr()->get_consume_filter(_runtime_filter_descs[i].filter_id, - &runtime_filter); - DCHECK(runtime_filter != nullptr); - runtime_filter->init_profile(_runtime_profile.get()); - } - return Status::OK(); -} - -Status OlapScanNode::open(RuntimeState* state) { - VLOG_CRITICAL << "OlapScanNode::Open"; - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - _resource_info = ResourceTls::get_resource_tls(); - - // acquire runtime filter - _runtime_filter_ctxs.resize(_runtime_filter_descs.size()); - - for (size_t i = 0; i < _runtime_filter_descs.size(); ++i) { - auto& filter_desc = _runtime_filter_descs[i]; - IRuntimeFilter* runtime_filter = nullptr; - state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, &runtime_filter); - DCHECK(runtime_filter != nullptr); - if (auto bf = runtime_filter->get_bloomfilter()) { - RETURN_IF_ERROR(bf->init_with_fixed_length()); - } - if (runtime_filter == nullptr) { - continue; - } - bool ready = runtime_filter->is_ready(); - if (!ready) { - ready = runtime_filter->await(); - } - if (ready) { - std::list expr_context; - RETURN_IF_ERROR(runtime_filter->get_push_expr_ctxs(&expr_context)); - _runtime_filter_ctxs[i].apply_mark = true; - _runtime_filter_ctxs[i].runtimefilter = runtime_filter; - - for (auto ctx : expr_context) { - ctx->prepare(state, row_desc()); - ctx->open(state); - int index = _conjunct_ctxs.size(); - _conjunct_ctxs.push_back(ctx); - // it's safe to store address from a fix-resized vector - _conjunctid_to_runtime_filter_ctxs[index] = &_runtime_filter_ctxs[i]; - } - } - } - - return Status::OK(); -} - -Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // check if Canceled. - if (state->is_cancelled()) { - std::unique_lock l(_row_batches_lock); - _transfer_done = true; - std::lock_guard guard(_status_mutex); - if (LIKELY(_status.ok())) { - _status = Status::Cancelled("Cancelled"); - } - return _status; - } - - // check if started. - if (!_start) { - Status status = start_scan(state); - - if (!status.ok()) { - LOG(ERROR) << "StartScan Failed cause " << status; - *eos = true; - return status; - } - - _start = true; - } - - // some conjuncts will be disposed in start_scan function, so - // we should check _eos after call start_scan - if (_eos) { - *eos = true; - return Status::OK(); - } - - // wait for batch from queue - RowBatch* materialized_batch = nullptr; - { - std::unique_lock l(_row_batches_lock); - SCOPED_TIMER(_olap_wait_batch_queue_timer); - while (_materialized_row_batches.empty() && !_transfer_done) { - if (state->is_cancelled()) { - _transfer_done = true; - } - - // use wait_for, not wait, in case to capture the state->is_cancelled() - _row_batch_added_cv.wait_for(l, std::chrono::seconds(1)); - } - - if (!_materialized_row_batches.empty()) { - materialized_batch = _materialized_row_batches.front(); - DCHECK(materialized_batch != nullptr); - _materialized_row_batches.pop_front(); - _materialized_row_batches_bytes -= - materialized_batch->tuple_data_pool()->total_reserved_bytes(); - } - } - - // return batch - if (nullptr != materialized_batch) { - // notify scanner - _row_batch_consumed_cv.notify_one(); - // get scanner's batch memory - row_batch->acquire_state(materialized_batch); - _num_rows_returned += row_batch->num_rows(); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - // reach scan node limit - if (reached_limit()) { - int num_rows_over = _num_rows_returned - _limit; - row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); - _num_rows_returned -= num_rows_over; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - { - std::unique_lock l(_row_batches_lock); - _transfer_done = true; - } - - _row_batch_consumed_cv.notify_all(); - *eos = true; - VLOG_QUERY << "OlapScanNode ReachedLimit. fragment id=" - << print_id(_runtime_state->fragment_instance_id()); - } else { - *eos = false; - } - - if (VLOG_ROW_IS_ON) { - for (int i = 0; i < row_batch->num_rows(); ++i) { - TupleRow* row = row_batch->get_row(i); - VLOG_ROW << "OlapScanNode output row: " - << Tuple::to_string(row->get_tuple(0), *_tuple_desc); - } - } - - delete materialized_batch; - return Status::OK(); - } - - // all scanner done, change *eos to true - *eos = true; - std::lock_guard guard(_status_mutex); - return _status; -} - -Status OlapScanNode::collect_query_statistics(QueryStatistics* statistics) { - RETURN_IF_ERROR(ExecNode::collect_query_statistics(statistics)); - statistics->add_scan_bytes(_read_compressed_counter->value()); - statistics->add_scan_rows(_raw_rows_counter->value()); - statistics->add_cpu_ms(_scan_cpu_timer->value() / NANOS_PER_MILLIS); - return Status::OK(); -} - -Status OlapScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - // change done status - { - std::unique_lock l(_row_batches_lock); - _transfer_done = true; - } - // notify all scanner thread - _row_batch_consumed_cv.notify_all(); - _row_batch_added_cv.notify_all(); - _scan_batch_added_cv.notify_all(); - - // _transfer_thread - // _transfer_thread may not be initialized. So need to check it - if (_transfer_thread != nullptr) { - _transfer_thread->join(); - } - - // clear some row batch in queue - for (auto row_batch : _materialized_row_batches) { - delete row_batch; - } - - _materialized_row_batches.clear(); - _materialized_row_batches_bytes = 0; - - for (auto row_batch : _scan_row_batches) { - delete row_batch; - } - - _scan_row_batches.clear(); - _scan_row_batches_bytes = 0; - - // OlapScanNode terminate by exception - // so that initiative close the Scanner - for (auto scanner : _olap_scanners) { - scanner->close(state); - } - - for (auto& filter_desc : _runtime_filter_descs) { - IRuntimeFilter* runtime_filter = nullptr; - state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, &runtime_filter); - DCHECK(runtime_filter != nullptr); - runtime_filter->consumer_close(); - } - - VLOG_CRITICAL << "OlapScanNode::close()"; - // pushed functions close - Expr::close(_pushed_func_conjunct_ctxs, state); - - return ScanNode::close(state); -} - -// PlanFragmentExecutor will call this method to set scan range -// Doris scan range is defined in thrift file like this -// struct TPaloScanRange { -// 1: required list hosts -// 2: required string schema_hash -// 3: required string version -// 5: required Types.TTabletId tablet_id -// 6: required string db_name -// 7: optional list partition_column_ranges -// 8: optional string index_name -// 9: optional string table_name -//} -// every doris_scan_range is related with one tablet so that one olap scan node contains multiple tablet -Status OlapScanNode::set_scan_ranges(const std::vector& scan_ranges) { - for (auto& scan_range : scan_ranges) { - DCHECK(scan_range.scan_range.__isset.palo_scan_range); - _scan_ranges.emplace_back(new TPaloScanRange(scan_range.scan_range.palo_scan_range)); - COUNTER_UPDATE(_tablet_counter, 1); - } - - return Status::OK(); -} - -Status OlapScanNode::start_scan(RuntimeState* state) { - RETURN_IF_CANCELLED(state); - - VLOG_CRITICAL << "Eval Const Conjuncts"; - // 1. Eval const conjuncts to find whether eos = true - eval_const_conjuncts(); - - VLOG_CRITICAL << "NormalizeConjuncts"; - // 2. Convert conjuncts to ColumnValueRange in each column, some conjuncts may - // set eos = true - RETURN_IF_ERROR(normalize_conjuncts()); - - // 1 and 2 step dispose find conjuncts set eos = true, return directly - if (_eos) { - return Status::OK(); - } - - VLOG_CRITICAL << "BuildKeyRangesAndFilters"; - // 3.1 Using ColumnValueRange to Build StorageEngine filters - RETURN_IF_ERROR(build_key_ranges_and_filters()); - // 3.2 Function pushdown - if (state->enable_function_pushdown()) RETURN_IF_ERROR(build_function_filters()); - - VLOG_CRITICAL << "Filter idle conjuncts"; - // 4. Filter idle conjunct which already trans to olap filters - // this must be after build_scan_key, it will free the StringValue memory - remove_pushed_conjuncts(state); - - VLOG_CRITICAL << "StartScanThread"; - // 5. Start multi thread to read several `Sub Sub ScanRange` - RETURN_IF_ERROR(start_scan_thread(state)); - - return Status::OK(); -} - -bool OlapScanNode::is_key_column(const std::string& key_name) { - // all column in dup_keys table olap scan node threat - // as key column - if (_olap_scan_node.keyType == TKeysType::DUP_KEYS) { - return true; - } - - auto res = std::find(_olap_scan_node.key_column_name.begin(), - _olap_scan_node.key_column_name.end(), key_name); - return res != _olap_scan_node.key_column_name.end(); -} - -void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) { - if (_pushed_conjuncts_index.empty() && _pushed_func_conjuncts_index.empty()) { - return; - } - - // dispose direct conjunct first - std::vector new_conjunct_ctxs; - for (int i = 0; i < _direct_conjunct_size; ++i) { - if (!_pushed_conjuncts_index.empty() && _pushed_conjuncts_index.count(i)) { - _conjunct_ctxs[i]->close(state); // pushed condition, just close - } else if (!_pushed_func_conjuncts_index.empty() && _pushed_func_conjuncts_index.count(i)) { - _pushed_func_conjunct_ctxs.emplace_back( - _conjunct_ctxs[i]); // pushed functions, need keep ctxs - } else { - new_conjunct_ctxs.emplace_back(_conjunct_ctxs[i]); - } - } - - auto new_direct_conjunct_size = new_conjunct_ctxs.size(); - - // dispose hash join push down conjunct second - for (int i = _direct_conjunct_size; i < _conjunct_ctxs.size(); ++i) { - if (!_pushed_conjuncts_index.empty() && _pushed_conjuncts_index.count(i)) { - _conjunct_ctxs[i]->close(state); // pushed condition, just close - } else if (!_pushed_func_conjuncts_index.empty() && _pushed_func_conjuncts_index.count(i)) { - _pushed_func_conjunct_ctxs.emplace_back( - _conjunct_ctxs[i]); // pushed functions, need keep ctxs - } else { - new_conjunct_ctxs.emplace_back(_conjunct_ctxs[i]); - } - } - - _conjunct_ctxs = std::move(new_conjunct_ctxs); - _direct_conjunct_size = new_direct_conjunct_size; - - // TODO: support vbloom_filter_predicate/vbinary_predicate and merge unpushed predicate to _vconjunct_ctx - for (auto push_down_ctx : _pushed_conjuncts_index) { - auto iter = _conjunctid_to_runtime_filter_ctxs.find(push_down_ctx); - if (iter != _conjunctid_to_runtime_filter_ctxs.end()) { - iter->second->runtimefilter->set_push_down_profile(); - } - } - - // set vconjunct_ctx is empty, if all conjunct - if (_direct_conjunct_size == 0) { - if (_vconjunct_ctx_ptr != nullptr) { - (*_vconjunct_ctx_ptr)->close(state); - _vconjunct_ctx_ptr = nullptr; - } - } - - // filter idle conjunct in vexpr_contexts - auto checker = [&](int index) { return _pushed_conjuncts_index.count(index); }; - _peel_pushed_vconjunct(state, checker); -} - -void OlapScanNode::eval_const_conjuncts() { - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - // if conjunct is constant, compute direct and set eos = true - if (_conjunct_ctxs[conj_idx]->root()->is_constant()) { - void* value = _conjunct_ctxs[conj_idx]->get_value(nullptr); - if (value == nullptr || *reinterpret_cast(value) == false) { - _eos = true; - break; - } - } - } -} - -Status OlapScanNode::normalize_conjuncts() { - std::vector slots = _tuple_desc->slots(); - - for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) { - switch (slots[slot_idx]->type().type) { - case TYPE_TINYINT: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_SMALLINT: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_INT: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_BIGINT: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_LARGEINT: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_CHAR: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_VARCHAR: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_HLL: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_STRING: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DATE: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - case TYPE_DATETIME: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DATEV2: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_DECIMALV2: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - case TYPE_BOOLEAN: { - ColumnValueRange range(slots[slot_idx]->col_name()); - normalize_predicate(range, slots[slot_idx]); - break; - } - - default: { - VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name() - << "]"; - break; - } - } - } - - return Status::OK(); -} - -static std::string olap_filter_to_string(const doris::TCondition& condition) { - auto op_name = condition.condition_op; - if (condition.condition_op == "*=") { - op_name = "IN"; - } else if (condition.condition_op == "!*=") { - op_name = "NOT IN"; - } - return fmt::format("{{{} {} {}}}", condition.column_name, op_name, - to_string(condition.condition_values)); -} - -static std::string olap_filters_to_string(const std::vector& filters) { - // std::vector filters_string; - std::string filters_string; - filters_string += "["; - for (auto it = filters.cbegin(); it != filters.cend(); it++) { - if (it != filters.cbegin()) { - filters_string += ","; - } - filters_string += olap_filter_to_string(*it); - } - filters_string += "]"; - return filters_string; -} - -Status OlapScanNode::build_function_filters() { - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - ExprContext* ex_ctx = _conjunct_ctxs[conj_idx]; - Expr* fn_expr = ex_ctx->root(); - bool opposite = false; - - if (TExprNodeType::COMPOUND_PRED == fn_expr->node_type() && - TExprOpcode::COMPOUND_NOT == fn_expr->op()) { - fn_expr = fn_expr->get_child(0); - opposite = true; - } - - // currently only support like / not like - if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type() && - "like" == fn_expr->fn().name.function_name) { - doris_udf::FunctionContext* func_cxt = - ex_ctx->fn_context(fn_expr->get_fn_context_index()); - - if (!func_cxt) { - continue; - } - if (fn_expr->children().size() != 2) { - continue; - } - SlotRef* slot_ref = nullptr; - Expr* literal_expr = nullptr; - - if (TExprNodeType::SLOT_REF == fn_expr->get_child(0)->node_type()) { - literal_expr = fn_expr->get_child(1); - slot_ref = (SlotRef*)(fn_expr->get_child(0)); - } else if (TExprNodeType::SLOT_REF == fn_expr->get_child(1)->node_type()) { - literal_expr = fn_expr->get_child(0); - slot_ref = (SlotRef*)(fn_expr->get_child(1)); - } else { - continue; - } - - if (TExprNodeType::STRING_LITERAL != literal_expr->node_type()) continue; - - const SlotDescriptor* slot_desc = nullptr; - std::vector slot_ids; - slot_ref->get_slot_ids(&slot_ids); - for (SlotDescriptor* slot : _tuple_desc->slots()) { - if (slot->id() == slot_ids[0]) { - slot_desc = slot; - break; - } - } - - if (!slot_desc) { - continue; - } - std::string col = slot_desc->col_name(); - StringVal val = literal_expr->get_string_val(ex_ctx, nullptr); - _push_down_functions.emplace_back(opposite, col, func_cxt, val); - _pushed_func_conjuncts_index.insert(conj_idx); - } - } - return Status::OK(); -} - -Status OlapScanNode::build_key_ranges_and_filters() { - const std::vector& column_names = _olap_scan_node.key_column_name; - const std::vector& column_types = _olap_scan_node.key_column_type; - DCHECK(column_types.size() == column_names.size()); - - // 1. construct scan key except last olap engine short key - _scan_keys.set_is_convertible(limit() == -1); - - // we use `exact_range` to identify a key range is an exact range or not when we convert - // it to `_scan_keys`. If `exact_range` is true, we can just discard it from `_olap_filter`. - bool exact_range = true; - bool eos = false; - for (int column_index = 0; - column_index < column_names.size() && !_scan_keys.has_range_value() && !eos; - ++column_index) { - auto iter = _column_value_ranges.find(column_names[column_index]); - if (_column_value_ranges.end() == iter) { - break; - } - - RETURN_IF_ERROR(std::visit( - [&](auto&& range) { - RETURN_IF_ERROR(_scan_keys.extend_scan_key(range, _max_scan_key_num, - &exact_range, &eos)); - if (exact_range) { - _column_value_ranges.erase(iter->first); - } - return Status::OK(); - }, - iter->second)); - } - _eos |= eos; - - for (auto& iter : _column_value_ranges) { - std::vector filters; - std::visit([&](auto&& range) { range.to_olap_filter(filters); }, iter.second); - - for (auto& filter : filters) { - _olap_filter.push_back(std::move(filter)); - } - } - - _runtime_profile->add_info_string("PushdownPredicate", olap_filters_to_string(_olap_filter)); - - _runtime_profile->add_info_string("KeyRanges", _scan_keys.debug_string()); - - VLOG_CRITICAL << _scan_keys.debug_string(); - - return Status::OK(); -} - -Status OlapScanNode::get_hints(TabletSharedPtr table, const TPaloScanRange& scan_range, - int block_row_count, bool is_begin_include, bool is_end_include, - const std::vector>& scan_key_range, - std::vector>* sub_scan_range, - RuntimeProfile* profile) { - RuntimeProfile::Counter* show_hints_timer = profile->get_counter("ShowHintsTime_V1"); - std::vector> ranges; - bool have_valid_range = false; - for (auto& key_range : scan_key_range) { - if (key_range->begin_scan_range.size() == 1 && - key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { - continue; - } - SCOPED_TIMER(show_hints_timer); - - Status res = Status::OK(); - std::vector range; - res = table->split_range(key_range->begin_scan_range, key_range->end_scan_range, - block_row_count, &range); - if (!res.ok()) { - return Status::InternalError("fail to show hints"); - } - ranges.emplace_back(std::move(range)); - have_valid_range = true; - } - - if (!have_valid_range) { - std::vector range; - auto res = table->split_range({}, {}, block_row_count, &range); - if (!res.ok()) { - return Status::InternalError("fail to show hints"); - } - ranges.emplace_back(std::move(range)); - } - - for (int i = 0; i < ranges.size(); ++i) { - for (int j = 0; j < ranges[i].size(); j += 2) { - std::unique_ptr range(new OlapScanRange); - range->begin_scan_range.reset(); - range->begin_scan_range = ranges[i][j]; - range->end_scan_range.reset(); - range->end_scan_range = ranges[i][j + 1]; - - if (0 == j) { - range->begin_include = is_begin_include; - } else { - range->begin_include = true; - } - - if (j + 2 == ranges[i].size()) { - range->end_include = is_end_include; - } else { - range->end_include = false; - } - - sub_scan_range->emplace_back(std::move(range)); - } - } - - return Status::OK(); -} - -Status OlapScanNode::start_scan_thread(RuntimeState* state) { - if (_scan_ranges.empty()) { - _transfer_done = true; - return Status::OK(); - } - - // ranges constructed from scan keys - std::vector> cond_ranges; - RETURN_IF_ERROR(_scan_keys.get_key_range(&cond_ranges)); - // if we can't get ranges from conditions, we give it a total range - if (cond_ranges.empty()) { - cond_ranges.emplace_back(new OlapScanRange()); - } - - bool need_split = true; - // If we have ranges more than 64, there is no need to call - // ShowHint to split ranges - if (limit() != -1 || cond_ranges.size() > 64) { - need_split = false; - } - - int scanners_per_tablet = std::max(1, 64 / (int)_scan_ranges.size()); - std::unordered_set disk_set; - for (auto& scan_range : _scan_ranges) { - auto tablet_id = scan_range->tablet_id; - int32_t schema_hash = strtoul(scan_range->schema_hash.c_str(), nullptr, 10); - std::string err; - TabletSharedPtr tablet = - StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err); - if (tablet == nullptr) { - std::stringstream ss; - ss << "failed to get tablet: " << tablet_id << " with schema hash: " << schema_hash - << ", reason: " << err; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - std::vector>* ranges = &cond_ranges; - std::vector> split_ranges; - if (need_split && !tablet->all_beta()) { - auto st = get_hints(tablet, *scan_range, config::doris_scan_range_row_count, - _scan_keys.begin_include(), _scan_keys.end_include(), cond_ranges, - &split_ranges, _runtime_profile.get()); - if (st.ok()) { - ranges = &split_ranges; - } - } - // In order to avoid the problem of too many scanners caused by small tablets, - // in addition to scanRange, we also need to consider the size of the tablet when - // creating the scanner. One scanner is used for every 1Gb, and the final scanner_per_tablet - // takes the minimum value calculated by scanrange and size. - int size_based_scanners_per_tablet = 1; - if (config::doris_scan_range_max_mb > 0) { - size_based_scanners_per_tablet = std::max( - 1, (int)(tablet->tablet_footprint() / (config::doris_scan_range_max_mb << 20))); - } - int ranges_per_scanner = - std::max(1, (int)ranges->size() / - std::min(scanners_per_tablet, size_based_scanners_per_tablet)); - int num_ranges = ranges->size(); - for (int i = 0; i < num_ranges;) { - std::vector scanner_ranges; - scanner_ranges.push_back((*ranges)[i].get()); - ++i; - for (int j = 1; i < num_ranges && j < ranges_per_scanner && - (*ranges)[i]->end_include == (*ranges)[i - 1]->end_include; - ++j, ++i) { - scanner_ranges.push_back((*ranges)[i].get()); - } - OlapScanner* scanner = - new OlapScanner(state, this, _olap_scan_node.is_preaggregation, - _need_agg_finalize, *scan_range, _scanner_mem_tracker); - scanner->set_batch_size(_batch_size); - // add scanner to pool before doing prepare. - // so that scanner can be automatically deconstructed if prepare failed. - _scanner_pool.add(scanner); - RETURN_IF_ERROR(scanner->prepare(*scan_range, scanner_ranges, _olap_filter, - _bloom_filters_push_down, _push_down_functions)); - - _olap_scanners.push_back(scanner); - disk_set.insert(scanner->scan_disk()); - } - } - COUNTER_SET(_num_disks_accessed_counter, static_cast(disk_set.size())); - COUNTER_SET(_num_scanners, static_cast(_olap_scanners.size())); - - // PAIN_LOG(_olap_scanners.size()); - // init progress - std::stringstream ss; - ss << "ScanThread complete (node=" << id() << "):"; - _progress = ProgressUpdater(ss.str(), _olap_scanners.size(), 1); - - _transfer_thread = std::make_shared(&OlapScanNode::transfer_thread, this, state); - - return Status::OK(); -} - -template -Status OlapScanNode::normalize_predicate(ColumnValueRange& range, SlotDescriptor* slot) { - // 1. Normalize InPredicate, add to ColumnValueRange - RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range)); - - // 2. Normalize NotInPredicate, add to ColumnValueRange - RETURN_IF_ERROR(normalize_not_in_and_not_eq_predicate(slot, &range)); - - // 3. Normalize BinaryPredicate , add to ColumnValueRange - RETURN_IF_ERROR(normalize_noneq_binary_predicate(slot, &range)); - - // 3. Normalize BloomFilterPredicate, push down by hash join node - RETURN_IF_ERROR(normalize_bloom_filter_predicate(slot)); - - // 4. Check whether range is empty, set _eos - if (range.is_empty_value_range()) _eos = true; - - // 5. Add range to Column->ColumnValueRange map - _column_value_ranges[slot->col_name()] = range; - - return Status::OK(); -} - -static bool ignore_cast(SlotDescriptor* slot, Expr* expr) { - if (slot->type().is_date_type() && expr->type().is_date_type()) { - return true; - } - if (slot->type().is_string_type() && expr->type().is_string_type()) { - return true; - } - return false; -} - -bool OlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot, - doris::InPredicate* pred) { - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - // not a slot ref(column) - return false; - } - - std::vector slot_ids; - if (pred->get_child(0)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return false; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return false; - } - - if (pred->get_child(0)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(0))) { - // the type of predicate not match the slot's type - return false; - } - } - - VLOG_CRITICAL << slot->col_name() << " fixed_values add num: " << pred->hybrid_set()->size(); - - // if there are too many elements in InPredicate, exceed the limit, - // we will not push any condition of this column to storage engine. - // because too many conditions pushed down to storage engine may even - // slow down the query process. - // ATTN: This is just an experience value. You may need to try - // different thresholds to improve performance. - if (pred->hybrid_set()->size() > _max_pushdown_conditions_per_column) { - VLOG_NOTICE << "Predicate value num " << pred->hybrid_set()->size() << " exceed limit " - << _max_pushdown_conditions_per_column; - return false; - } - - return true; -} - -std::pair OlapScanNode::should_push_down_eq_predicate(doris::SlotDescriptor* slot, - doris::Expr* pred, int conj_idx, - int child_idx) { - auto result_pair = std::make_pair(false, nullptr); - - // Do not get slot_ref of column, should not push_down to Storage Engine - if (Expr::type_without_cast(pred->get_child(child_idx)) != TExprNodeType::SLOT_REF) { - return result_pair; - } - - std::vector slot_ids; - if (pred->get_child(child_idx)->get_slot_ids(&slot_ids) != 1) { - // not a single column predicate - return result_pair; - } - - if (slot_ids[0] != slot->id()) { - // predicate not related to current column - return result_pair; - } - - if (pred->get_child(child_idx)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(child_idx))) { - // the type of predicate not match the slot's type - return result_pair; - } - } - - Expr* expr = pred->get_child(1 - child_idx); - if (!expr->is_constant()) { - // only handle constant value - return result_pair; - } - - // get value in result pair - result_pair = std::make_pair(true, _conjunct_ctxs[conj_idx]->get_value(expr, nullptr)); - - return result_pair; -} - -template -Status OlapScanNode::change_fixed_value_range(ColumnValueRange& temp_range, - void* value, const ChangeFixedValueRangeFunc& func) { - switch (primitive_type) { - case TYPE_DATE: { - DateTimeValue date_value = *reinterpret_cast(value); - // There is must return empty data in olap_scan_node, - // Because data value loss accuracy - if (!date_value.check_loss_accuracy_cast_to_date()) { - func(temp_range, - reinterpret_cast::CppType*>( - &date_value)); - } - break; - } - case TYPE_DECIMALV2: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_DATETIME: - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: - case TYPE_STRING: { - func(temp_range, - reinterpret_cast::CppType*>(value)); - break; - } - case TYPE_BOOLEAN: { - bool v = *reinterpret_cast(value); - func(temp_range, - reinterpret_cast::CppType*>(&v)); - break; - } - default: { - LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" - << primitive_type << "]"; - return Status::InternalError("Normalize filter fail, Unsupported Primitive type"); - } - } - return Status::OK(); -} - -// Construct the ColumnValueRange for one specified column -// It will only handle the InPredicate and eq BinaryPredicate in _conjunct_ctxs. -// It will try to push down conditions of that column as much as possible, -// But if the number of conditions exceeds the limit, none of conditions will be pushed down. -template -Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot, - ColumnValueRange* range) { - std::vector filter_conjuncts_index; - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - // create empty range as temp range, temp range should do intersection on range - auto temp_range = ColumnValueRange::create_empty_column_value_range(); - - // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)' - if (TExprOpcode::FILTER_IN == _conjunct_ctxs[conj_idx]->root()->op()) { - InPredicate* pred = static_cast(_conjunct_ctxs[conj_idx]->root()); - if (!should_push_down_in_predicate(slot, pred)) { - continue; - } - - // begin to push InPredicate value into ColumnValueRange - HybridSetBase::IteratorBase* iter = pred->hybrid_set()->begin(); - while (iter->has_next()) { - // column in (nullptr) is always false so continue to - // dispose next item - if (nullptr == iter->get_value()) { - iter->next(); - continue; - } - auto value = const_cast(iter->get_value()); - RETURN_IF_ERROR(change_fixed_value_range( - temp_range, value, ColumnValueRange::add_fixed_value_range)); - iter->next(); - } - - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - range->intersection(temp_range); - } // end of handle in predicate - // 2. Normalize eq conjuncts like 'where col = value' - else if (TExprNodeType::BINARY_PRED == _conjunct_ctxs[conj_idx]->root()->node_type() && - FILTER_IN == to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) { - Expr* pred = _conjunct_ctxs[conj_idx]->root(); - DCHECK(pred->get_num_children() == 2); - - for (int child_idx = 0; child_idx < 2; ++child_idx) { - // TODO: should use C++17 structured bindlings to refactor this code in the future: - // 'auto [should_push_down, value] = should_push_down_eq_predicate(slot, pred, conj_idx, child_idx);' - // make code tidier and readabler - auto result_pair = should_push_down_eq_predicate(slot, pred, conj_idx, child_idx); - if (!result_pair.first) { - continue; - } - - auto value = result_pair.second; - // where A = nullptr should return empty result set - if (value != nullptr) { - RETURN_IF_ERROR(change_fixed_value_range( - temp_range, value, ColumnValueRange::add_fixed_value_range)); - } - - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - range->intersection(temp_range); - } // end for each binary predicate child - } // end of handling eq binary predicate - } - - // exceed limit, no conditions will be pushed down to storage engine. - if (range->get_fixed_value_size() > _max_pushdown_conditions_per_column) { - range->set_whole_value_range(); - } else { - std::copy(filter_conjuncts_index.cbegin(), filter_conjuncts_index.cend(), - std::inserter(_pushed_conjuncts_index, _pushed_conjuncts_index.begin())); - } - return Status::OK(); -} - -// Construct the ColumnValueRange for one specified column -// It will only handle the NotInPredicate and not eq BinaryPredicate in _conjunct_ctxs. -// It will try to push down conditions of that column as much as possible, -// But if the number of conditions exceeds the limit, none of conditions will be pushed down. -template -Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot, - ColumnValueRange* range) { - // If the conjunct of slot is fixed value, will change the fixed value set of column value range - // else add value to not in range and push down predicate directly - bool is_fixed_range = range->is_fixed_value_range(); - auto not_in_range = ColumnValueRange::create_empty_column_value_range(range->column_name()); - - std::vector filter_conjuncts_index; - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - // 1. Normalize in conjuncts like 'where col not in (v1, v2, v3)' - if (TExprOpcode::FILTER_NOT_IN == _conjunct_ctxs[conj_idx]->root()->op()) { - InPredicate* pred = static_cast(_conjunct_ctxs[conj_idx]->root()); - if (!should_push_down_in_predicate(slot, pred)) { - continue; - } - - // begin to push InPredicate value into ColumnValueRange - auto iter = pred->hybrid_set()->begin(); - while (iter->has_next()) { - // column not in (nullptr) is always true - if (nullptr == iter->get_value()) { - continue; - } - auto value = const_cast(iter->get_value()); - if (is_fixed_range) { - RETURN_IF_ERROR(change_fixed_value_range( - *range, value, ColumnValueRange::remove_fixed_value_range)); - } else { - RETURN_IF_ERROR(change_fixed_value_range( - not_in_range, value, ColumnValueRange::add_fixed_value_range)); - } - iter->next(); - } - - // only where a in ('a', 'b', nullptr) contain nullptr will - // clear temp_range to whole range, no need do intersection - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - } // end of handle not in predicate - - // 2. Normalize eq conjuncts like 'where col != value' - if (TExprNodeType::BINARY_PRED == _conjunct_ctxs[conj_idx]->root()->node_type() && - FILTER_NOT_IN == to_olap_filter_type(_conjunct_ctxs[conj_idx]->root()->op(), false)) { - Expr* pred = _conjunct_ctxs[conj_idx]->root(); - DCHECK(pred->get_num_children() == 2); - - for (int child_idx = 0; child_idx < 2; ++child_idx) { - // TODO: should use C++17 structured bindlings to refactor this code in the future: - // 'auto [should_push_down, value] = should_push_down_eq_predicate(slot, pred, conj_idx, child_idx);' - // make code tidier and readabler - auto result_pair = should_push_down_eq_predicate(slot, pred, conj_idx, child_idx); - if (!result_pair.first) { - continue; - } - auto value = result_pair.second; - - if (is_fixed_range) { - RETURN_IF_ERROR(change_fixed_value_range( - *range, value, ColumnValueRange::remove_fixed_value_range)); - } else { - RETURN_IF_ERROR(change_fixed_value_range( - not_in_range, value, ColumnValueRange::add_fixed_value_range)); - } - - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - } // end for each binary predicate child - } // end of handling eq binary predicate - } - - // exceed limit, no conditions will be pushed down to storage engine. - if (is_fixed_range || - not_in_range.get_fixed_value_size() <= _max_pushdown_conditions_per_column) { - if (!is_fixed_range) { - // push down not in condition to storage engine - not_in_range.to_in_condition(_olap_filter, false); - } - std::copy(filter_conjuncts_index.cbegin(), filter_conjuncts_index.cend(), - std::inserter(_pushed_conjuncts_index, _pushed_conjuncts_index.begin())); - } - return Status::OK(); -} - -template -bool OlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot, - const std::string& is_null_str, - ColumnValueRange* range) { - if (expr->node_type() != TExprNodeType::SLOT_REF) { - return false; - } - - std::vector slot_ids; - if (1 != expr->get_slot_ids(&slot_ids)) { - return false; - } - - if (slot_ids[0] != slot->id()) { - return false; - } - - auto temp_range = ColumnValueRange::create_empty_column_value_range(); - temp_range.set_contain_null(is_null_str == "null"); - range->intersection(temp_range); - - return true; -} - -template -Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, - ColumnValueRange* range) { - std::vector filter_conjuncts_index; - - for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - Expr* root_expr = _conjunct_ctxs[conj_idx]->root(); - if (TExprNodeType::BINARY_PRED != root_expr->node_type() || - FILTER_IN == to_olap_filter_type(root_expr->op(), false) || - FILTER_NOT_IN == to_olap_filter_type(root_expr->op(), false)) { - if (TExprNodeType::FUNCTION_CALL == root_expr->node_type()) { - std::string is_null_str; - // 1. dispose the where pred "A is null" and "A is not null" - if (root_expr->is_null_scalar_function(is_null_str) && - normalize_is_null_predicate(root_expr->get_child(0), slot, is_null_str, - range)) { - // if column is key column should push down conjunct storage engine - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - } - } - continue; - } - - // 2. dispose the where pred "A <,<=" and "A >,>=" - Expr* pred = _conjunct_ctxs[conj_idx]->root(); - DCHECK(pred->get_num_children() == 2); - - for (int child_idx = 0; child_idx < 2; ++child_idx) { - if (Expr::type_without_cast(pred->get_child(child_idx)) != TExprNodeType::SLOT_REF) { - continue; - } - if (pred->get_child(child_idx)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(child_idx))) { - continue; - } - } - - std::vector slot_ids; - - if (1 == pred->get_child(child_idx)->get_slot_ids(&slot_ids)) { - if (slot_ids[0] != slot->id()) { - continue; - } - - Expr* expr = pred->get_child(1 - child_idx); - - // for case: where col_a > col_b - if (!expr->is_constant()) { - continue; - } - - void* value = _conjunct_ctxs[conj_idx]->get_value(expr, nullptr); - // for case: where col > null - if (value == nullptr) { - continue; - } - - switch (slot->type().type) { - case TYPE_DATE: { - DateTimeValue date_value = *reinterpret_cast(value); - // NOTE: Datetime may be truncated to a date column, so we call ++operator for date_value - // for example: '2010-01-01 00:00:01' will be truncate to '2010-01-01' - if (date_value.check_loss_accuracy_cast_to_date()) { - if (pred->op() == TExprOpcode::LT || pred->op() == TExprOpcode::GE) { - ++date_value; - } - } - range->add_range(to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast::CppType*>( - &date_value)); - break; - } - case TYPE_TINYINT: - case TYPE_DECIMALV2: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_DATETIME: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: - case TYPE_BOOLEAN: - case TYPE_STRING: { - range->add_range( - to_olap_filter_type(pred->op(), child_idx), - *reinterpret_cast::CppType*>(value)); - break; - } - - default: { - LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" - << expr->type() << "]"; - return Status::InternalError( - "Normalize filter fail, Unsupported Primitive type"); - } - } - - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - } - - VLOG_CRITICAL << slot->col_name() << " op: " - << static_cast(to_olap_filter_type(pred->op(), child_idx)) - << " value: " - << *reinterpret_cast::CppType*>( - value); - } - } - } - - std::copy(filter_conjuncts_index.cbegin(), filter_conjuncts_index.cend(), - std::inserter(_pushed_conjuncts_index, _pushed_conjuncts_index.begin())); - - return Status::OK(); -} - -Status OlapScanNode::normalize_bloom_filter_predicate(SlotDescriptor* slot) { - std::vector filter_conjuncts_index; - - for (int conj_idx = _direct_conjunct_size; conj_idx < _conjunct_ctxs.size(); ++conj_idx) { - Expr* root_expr = _conjunct_ctxs[conj_idx]->root(); - if (TExprNodeType::BLOOM_PRED != root_expr->node_type()) continue; - - Expr* pred = _conjunct_ctxs[conj_idx]->root(); - DCHECK(pred->get_num_children() == 1); - - if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) { - continue; - } - if (pred->get_child(0)->type().type != slot->type().type) { - if (!ignore_cast(slot, pred->get_child(0))) { - continue; - } - } - - std::vector slot_ids; - - if (1 == pred->get_child(0)->get_slot_ids(&slot_ids)) { - if (slot_ids[0] != slot->id()) { - continue; - } - // only key column of bloom filter will push down to storage engine - if (is_key_column(slot->col_name())) { - filter_conjuncts_index.emplace_back(conj_idx); - _bloom_filters_push_down.emplace_back( - slot->col_name(), - (reinterpret_cast(pred))->get_bloom_filter_func()); - } - } - } - - std::copy(filter_conjuncts_index.cbegin(), filter_conjuncts_index.cend(), - std::inserter(_pushed_conjuncts_index, _pushed_conjuncts_index.begin())); - - return Status::OK(); -} - -void OlapScanNode::transfer_thread(RuntimeState* state) { - // scanner open pushdown to scanThread - SCOPED_ATTACH_TASK(state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - Status status = Status::OK(); - for (auto scanner : _olap_scanners) { - status = Expr::clone_if_not_exists(_conjunct_ctxs, state, scanner->conjunct_ctxs()); - if (!status.ok()) { - std::lock_guard guard(_status_mutex); - _status = status; - break; - } - } - - /********************************* - * The basic strategy of priority scheduling: - * 1. Determine the initial nice value by querying the number of split ranges - * The more the number of Ranges, the more likely it is to be recognized as a large query, and the smaller the nice value - * 2. Adjust the nice value by querying the accumulated data volume - * The more data read, the more likely it is to be regarded as a large query, and the smaller the nice value - * 3. Judge the priority of the query by the nice value - * The larger the nice value, the more preferentially obtained query resources - * 4. Regularly increase the priority of the remaining tasks in the queue to avoid starvation for large queries - *********************************/ - ThreadPoolToken* thread_token = state->get_query_fragments_ctx()->get_token(); - PriorityThreadPool* thread_pool = state->exec_env()->scan_thread_pool(); - PriorityThreadPool* remote_thread_pool = state->exec_env()->remote_scan_thread_pool(); - _total_assign_num = 0; - _nice = 18 + std::max(0, 2 - (int)_olap_scanners.size() / 5); - std::list olap_scanners; - - int64_t mem_consume = _scanner_mem_tracker->consumption(); - int max_thread = _max_materialized_row_batches; - if (config::doris_scanner_row_num > state->batch_size()) { - max_thread /= config::doris_scanner_row_num / state->batch_size(); - if (max_thread <= 0) max_thread = 1; - } - // read from scanner - while (LIKELY(status.ok())) { - // When query cancel, _transfer_done is set to true at OlapScanNode::close, - // and the loop is exited at this time, and the current thread exits after - // waiting for _running_thread to decrease to 0. - if (UNLIKELY(_transfer_done)) { - LOG(INFO) << "Transfer thread cancelled, wait for the end of scan thread."; - break; - } - int assigned_thread_num = 0; - // copy to local - { - std::unique_lock l(_scan_batches_lock); - assigned_thread_num = _running_thread; - // How many thread can apply to this query - size_t thread_slot_num = 0; - mem_consume = _scanner_mem_tracker->consumption(); - // check limit for total memory and _scan_row_batches memory - if (mem_consume < (state->query_mem_tracker()->limit() * 6) / 10 && - _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2) { - thread_slot_num = max_thread - assigned_thread_num; - } else { - // Memory already exceed - if (_scan_row_batches.empty()) { - // NOTE(zc): here need to lock row_batches_lock_ - // be worried about dead lock, so don't check here - // if (materialized_row_batches_.empty()) { - // LOG(FATAL) << "Scan_row_batches_ and materialized_row_batches_" - // " are empty when memory exceed"; - // } - // Just for notify if scan_row_batches_ is empty and no running thread - if (assigned_thread_num == 0) { - thread_slot_num = 1; - // NOTE: if olap_scanners_ is empty, scanner_done_ should be true - } - } - } - thread_slot_num = std::min(thread_slot_num, _olap_scanners.size()); - for (int i = 0; i < thread_slot_num; ++i) { - olap_scanners.push_back(_olap_scanners.front()); - _olap_scanners.pop_front(); - _running_thread++; - assigned_thread_num++; - } - } - - auto iter = olap_scanners.begin(); - if (thread_token != nullptr) { - while (iter != olap_scanners.end()) { - auto s = thread_token->submit_func( - std::bind(&OlapScanNode::scanner_thread, this, *iter)); - if (s.ok()) { - (*iter)->start_wait_worker_timer(); - COUNTER_UPDATE(_scanner_sched_counter, 1); - olap_scanners.erase(iter++); - } else { - LOG(FATAL) << "Failed to assign scanner task to thread pool! " << s; - } - ++_total_assign_num; - } - } else { - while (iter != olap_scanners.end()) { - PriorityThreadPool::Task task; - task.work_function = std::bind(&OlapScanNode::scanner_thread, this, *iter); - task.priority = _nice; - task.queue_id = state->exec_env()->store_path_to_index((*iter)->scan_disk()); - (*iter)->start_wait_worker_timer(); - - TabletStorageType type = (*iter)->get_storage_type(); - bool ret = false; - COUNTER_UPDATE(_scanner_sched_counter, 1); - if (type == TabletStorageType::STORAGE_TYPE_LOCAL) { - ret = thread_pool->offer(task); - } else { - ret = remote_thread_pool->offer(task); - } - - if (ret) { - olap_scanners.erase(iter++); - } else { - LOG(FATAL) << "Failed to assign scanner task to thread pool!"; - } - ++_total_assign_num; - } - } - - RowBatch* scan_batch = nullptr; - { - // 1 scanner idle task not empty, assign new scanner task - std::unique_lock l(_scan_batches_lock); - - // scanner_row_num = 16k - // 16k * 10 * 12 * 8 = 15M(>2s) --> nice=10 - // 16k * 20 * 22 * 8 = 55M(>6s) --> nice=0 - while (_nice > 0 && _total_assign_num > (22 - _nice) * (20 - _nice) * 6) { - --_nice; - } - - // 2 wait when all scanner are running & no result in queue - while (UNLIKELY(_running_thread == assigned_thread_num && _scan_row_batches.empty() && - !_scanner_done)) { - SCOPED_TIMER(_scanner_wait_batch_timer); - _scan_batch_added_cv.wait(l); - } - - // 3 transfer result row batch when queue is not empty - if (LIKELY(!_scan_row_batches.empty())) { - scan_batch = _scan_row_batches.front(); - _scan_row_batches.pop_front(); - _scan_row_batches_bytes -= scan_batch->tuple_data_pool()->total_reserved_bytes(); - - // delete scan_batch if transfer thread should be stopped - // because scan_batch wouldn't be useful anymore - if (UNLIKELY(_transfer_done)) { - delete scan_batch; - scan_batch = nullptr; - } - } else { - if (_scanner_done) { - break; - } - } - } - - if (nullptr != scan_batch) { - add_one_batch(scan_batch); - } - } // end of transfer while - - VLOG_CRITICAL << "TransferThread finish."; - { - std::unique_lock l(_row_batches_lock); - _transfer_done = true; - _row_batch_added_cv.notify_all(); - } - - std::unique_lock l(_scan_batches_lock); - _scan_thread_exit_cv.wait(l, [this] { return _running_thread == 0; }); - VLOG_CRITICAL << "Scanner threads have been exited. TransferThread exit."; -} - -void OlapScanNode::scanner_thread(OlapScanner* scanner) { - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh_shared()); - Thread::set_self_name("olap_scanner"); - if (UNLIKELY(_transfer_done)) { - _scanner_done = true; - std::unique_lock l(_scan_batches_lock); - _running_thread--; - // We need to make sure the scanner is closed because the query has been closed or cancelled. - scanner->close(scanner->runtime_state()); - _scan_batch_added_cv.notify_one(); - _scan_thread_exit_cv.notify_one(); - LOG(INFO) << "Scan thread cancelled, cause query done, scan thread started to exit"; - return; - } - int64_t wait_time = scanner->update_wait_worker_timer(); - // Do not use ScopedTimer. There is no guarantee that, the counter - // (_scan_cpu_timer, the class member) is not destroyed after `_running_thread==0`. - ThreadCpuStopWatch cpu_watch; - cpu_watch.start(); - Status status = Status::OK(); - bool eos = false; - RuntimeState* state = scanner->runtime_state(); - DCHECK(nullptr != state); - if (!scanner->is_open()) { - status = scanner->open(); - if (!status.ok()) { - std::lock_guard guard(_status_mutex); - _status = status; - eos = true; - } - scanner->set_opened(); - } - - std::vector contexts; - auto& scanner_filter_apply_marks = *scanner->mutable_runtime_filter_marks(); - DCHECK(scanner_filter_apply_marks.size() == _runtime_filter_descs.size()); - for (size_t i = 0; i < scanner_filter_apply_marks.size(); i++) { - if (!scanner_filter_apply_marks[i] && !_runtime_filter_ctxs[i].apply_mark) { - IRuntimeFilter* runtime_filter = nullptr; - state->runtime_filter_mgr()->get_consume_filter(_runtime_filter_descs[i].filter_id, - &runtime_filter); - DCHECK(runtime_filter != nullptr); - bool ready = runtime_filter->is_ready(); - if (ready) { - runtime_filter->get_prepared_context(&contexts, row_desc()); - scanner_filter_apply_marks[i] = true; - } - } - } - - if (!contexts.empty()) { - std::vector new_contexts; - auto& scanner_conjunct_ctxs = *scanner->conjunct_ctxs(); - Expr::clone_if_not_exists(contexts, state, &new_contexts); - scanner_conjunct_ctxs.insert(scanner_conjunct_ctxs.end(), new_contexts.begin(), - new_contexts.end()); - scanner->set_use_pushdown_conjuncts(true); - } - - // apply to cgroup - if (_resource_info != nullptr) { - CgroupsMgr::apply_cgroup(_resource_info->user, _resource_info->group); - } - - std::vector row_batchs; - - // Because we use thread pool to scan data from storage. One scanner can't - // use this thread too long, this can starve other query's scanner. So, we - // need yield this thread when we do enough work. However, OlapStorage read - // data in pre-aggregate mode, then we can't use storage returned data to - // judge if we need to yield. So we record all raw data read in this round - // scan, if this exceed row number or bytes threshold, we yield this thread. - int64_t raw_rows_read = scanner->raw_rows_read(); - int64_t raw_rows_threshold = raw_rows_read + config::doris_scanner_row_num; - int64_t raw_bytes_read = 0; - int64_t raw_bytes_threshold = config::doris_scanner_row_bytes; - while (!eos && raw_rows_read < raw_rows_threshold && raw_bytes_read < raw_bytes_threshold) { - if (UNLIKELY(_transfer_done)) { - eos = true; - status = Status::Cancelled("Cancelled"); - VLOG_QUERY << "Scan thread cancelled, cause query done, maybe reach limit." - << ", fragment id=" << print_id(_runtime_state->fragment_instance_id()); - break; - } - RowBatch* row_batch = new RowBatch(this->row_desc(), _batch_size); - row_batch->set_scanner_id(scanner->id()); - status = scanner->get_batch(_runtime_state, row_batch, &eos); - if (!status.ok()) { - LOG(WARNING) << "Scan thread read OlapScanner failed: " << status; - eos = true; - break; - } - // 4. if status not ok, change status_. - if (UNLIKELY(row_batch->num_rows() == 0)) { - // may be failed, push already, scan node delete this batch. - delete row_batch; - row_batch = nullptr; - } else { - row_batchs.push_back(row_batch); - raw_bytes_read += row_batch->tuple_data_pool()->total_reserved_bytes(); - } - raw_rows_read = scanner->raw_rows_read(); - if (limit() != -1 && raw_rows_read >= limit()) { - eos = true; - break; - } - } - - { - std::unique_lock l(_scan_batches_lock); - // if we failed, check status. - if (UNLIKELY(!status.ok())) { - _transfer_done = true; - std::lock_guard guard(_status_mutex); - if (LIKELY(_status.ok())) { - _status = status; - } - } - - bool global_status_ok = false; - { - std::lock_guard guard(_status_mutex); - global_status_ok = _status.ok(); - } - - if (UNLIKELY(!global_status_ok)) { - eos = true; - for (auto rb : row_batchs) { - delete rb; - } - } else { - for (auto rb : row_batchs) { - _scan_row_batches.push_back(rb); - _scan_row_batches_bytes += rb->tuple_data_pool()->total_reserved_bytes(); - } - } - // If eos is true, we will process out of this lock block. - if (!eos) { - _olap_scanners.push_front(scanner); - } - } - if (eos) { - // close out of batches lock. we do this before _progress update - // that can assure this object can keep live before we finish. - scanner->close(_runtime_state); - - std::unique_lock l(_scan_batches_lock); - _progress.update(1); - if (_progress.done()) { - // this is the right out - _scanner_done = true; - } - } - - _scan_cpu_timer->update(cpu_watch.elapsed_time()); - _scanner_wait_worker_timer->update(wait_time); - - // The transfer thead will wait for `_running_thread==0`, to make sure all scanner threads won't access class members. - // Do not access class members after this code. - std::unique_lock l(_scan_batches_lock); - _running_thread--; - // Both cv of _scan_batch_added_cv and _scan_thread_exit_cv should be notify after - // change the value of _running_thread, because transfer thread lock will check the value - // of _running_thread after be notify. Otherwise there could be dead lock between scanner_thread - // and transfer thread - _scan_batch_added_cv.notify_one(); - _scan_thread_exit_cv.notify_one(); -} - -Status OlapScanNode::add_one_batch(RowBatch* row_batch) { - { - std::unique_lock l(_row_batches_lock); - - // check queue limit for both both batch size and bytes - while (UNLIKELY((_materialized_row_batches.size() >= _max_materialized_row_batches || - _materialized_row_batches_bytes >= _max_scanner_queue_size_bytes / 2) && - !_transfer_done)) { - _row_batch_consumed_cv.wait(l); - } - - VLOG_CRITICAL << "Push row_batch to materialized_row_batches"; - _materialized_row_batches.push_back(row_batch); - _materialized_row_batches_bytes += row_batch->tuple_data_pool()->total_reserved_bytes(); - } - // remove one batch, notify main thread - _row_batch_added_cv.notify_one(); - return Status::OK(); -} -} // namespace doris diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h deleted file mode 100644 index ed018e2d93..0000000000 --- a/be/src/exec/olap_scan_node.h +++ /dev/null @@ -1,350 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include - -#include "exec/olap_common.h" -#include "exec/olap_scanner.h" -#include "exec/scan_node.h" -#include "exprs/bloomfilter_predicate.h" -#include "exprs/function_filter.h" -#include "exprs/in_predicate.h" -#include "runtime/descriptors.h" -#include "util/progress_updater.h" -#include "util/spinlock.h" - -namespace doris { -class IRuntimeFilter; - -enum TransferStatus { - READ_ROWBATCH = 1, - INIT_HEAP = 2, - BUILD_ROWBATCH = 3, - MERGE = 4, - FINISH = 5, - ADD_ROWBATCH = 6, - ERROR = 7 -}; - -class OlapScanNode : public ScanNode { -public: - OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - Status prepare(RuntimeState* state) override; - Status open(RuntimeState* state) override; - Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - Status collect_query_statistics(QueryStatistics* statistics) override; - Status close(RuntimeState* state) override; - Status set_scan_ranges(const std::vector& scan_ranges) override; - Status get_hints(TabletSharedPtr table, const TPaloScanRange& scan_range, int block_row_count, - bool is_begin_include, bool is_end_include, - const std::vector>& scan_key_range, - std::vector>* sub_scan_range, - RuntimeProfile* profile); - -protected: - struct HeapType { - Tuple* tuple; - int id; - }; - - class MergeComparison { - public: - MergeComparison(CompareLargeFunc compute_fn, int offset) { - _compute_fn = compute_fn; - _offset = offset; - } - bool operator()(const HeapType& lhs, const HeapType& rhs) const { - return (*_compute_fn)(lhs.tuple->get_slot(_offset), rhs.tuple->get_slot(_offset)); - } - - private: - CompareLargeFunc _compute_fn; - int _offset; - }; - - typedef std::priority_queue, MergeComparison> Heap; - - void display_heap(const Heap& heap) const { - Heap h = heap; - std::stringstream s; - s << "Heap: ["; - - while (!h.empty()) { - HeapType v = h.top(); - s << "\nID: " << v.id << " Value:" << Tuple::to_string(v.tuple, *_tuple_desc); - h.pop(); - } - - VLOG_CRITICAL << s.str() << "\n]"; - } - - // In order to ensure the accuracy of the query result - // only key column conjuncts will be remove as idle conjunct - bool is_key_column(const std::string& key_name); - void remove_pushed_conjuncts(RuntimeState* state); - - Status start_scan(RuntimeState* state); - - void eval_const_conjuncts(); - Status normalize_conjuncts(); - Status build_key_ranges_and_filters(); - Status build_function_filters(); - - Status start_scan_thread(RuntimeState* state); - - template - Status normalize_predicate(ColumnValueRange& range, SlotDescriptor* slot); - - template - Status normalize_in_and_eq_predicate(SlotDescriptor* slot, ColumnValueRange* range); - - template - Status normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot, ColumnValueRange* range); - - template - Status normalize_noneq_binary_predicate(SlotDescriptor* slot, ColumnValueRange* range); - - Status normalize_bloom_filter_predicate(SlotDescriptor* slot); - - template - static bool normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot, - const std::string& is_null_str, - ColumnValueRange* range); - - void transfer_thread(RuntimeState* state); - void scanner_thread(OlapScanner* scanner); - - Status add_one_batch(RowBatch* row_batch); - - // Write debug string of this into out. - void debug_string(int indentation_level, std::stringstream* out) const override {} - - const std::vector& runtime_filter_descs() const { - return _runtime_filter_descs; - } - - void _init_counter(RuntimeState* state); - // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator - // according to the calling relationship - void init_scan_profile(); - - bool should_push_down_in_predicate(SlotDescriptor* slot, InPredicate* in_pred); - - template - static Status change_fixed_value_range(ColumnValueRange& range, void* value, - const ChangeFixedValueRangeFunc& func); - - std::pair should_push_down_eq_predicate(SlotDescriptor* slot, Expr* pred, - int conj_idx, int child_idx); - - friend class OlapScanner; - - // Tuple id resolved in prepare() to set _tuple_desc; - TupleId _tuple_id; - // doris scan node used to scan doris - TOlapScanNode _olap_scan_node; - // tuple descriptors - const TupleDescriptor* _tuple_desc; - // tuple index - int _tuple_idx; - // string slots - std::vector _string_slots; - // conjunct's index which already be push down storage engine - // should be remove in olap_scan_node, no need check this conjunct again - std::set _pushed_conjuncts_index; - // collection slots - std::vector _collection_slots; - - bool _eos; - - // column -> ColumnValueRange map - std::map _column_value_ranges; - - OlapScanKeys _scan_keys; - - std::vector> _scan_ranges; - - std::vector _olap_filter; - // push down bloom filters to storage engine. - // 1. std::pair.first :: column name - // 2. std::pair.second :: shared_ptr of BloomFilterFuncBase - std::vector>> - _bloom_filters_push_down; - - // push down functions to storage engine - // only support scalar functions, now just support like / not like - std::vector _push_down_functions; - // functions conjunct's index which already be push down storage engine - std::set _pushed_func_conjuncts_index; - // need keep these conjunct to the end of scan node, - // since some memory referenced by pushed function filters - std::vector _pushed_func_conjunct_ctxs; - - // Pool for storing allocated scanner objects. We don't want to use the - // runtime pool to ensure that the scanner objects are deleted before this - // object is. - ObjectPool _scanner_pool; - - size_t _batch_size = 0; - - std::shared_ptr _transfer_thread; - - // Keeps track of total splits and the number finished. - ProgressUpdater _progress; - - // Lock and condition variables protecting _materialized_row_batches. Row batches are - // produced asynchronously by the scanner threads and consumed by the main thread in - // GetNext. Row batches must be processed by the main thread in the order they are - // queued to avoid freeing attached resources prematurely (row batches will never depend - // on resources attached to earlier batches in the queue). - // This lock cannot be taken together with any other locks except _lock. - std::mutex _row_batches_lock; - std::condition_variable _row_batch_added_cv; - std::condition_variable _row_batch_consumed_cv; - - std::list _materialized_row_batches; - // to limit _materialized_row_batches_bytes < _max_scanner_queue_size_bytes / 2 - std::atomic_size_t _materialized_row_batches_bytes = 0; - - std::mutex _scan_batches_lock; - std::condition_variable _scan_batch_added_cv; - std::atomic_int _running_thread = 0; - std::condition_variable _scan_thread_exit_cv; - - std::list _scan_row_batches; - // to limit _scan_row_batches_bytes < _max_scanner_queue_size_bytes / 2 - std::atomic_size_t _scan_row_batches_bytes = 0; - - std::list _olap_scanners; - - int _max_materialized_row_batches; - // to limit _materialized_row_batches_bytes and _scan_row_batches_bytes - size_t _max_scanner_queue_size_bytes; - bool _start; - // Used in Scan thread to ensure thread-safe - std::atomic_bool _scanner_done; - std::atomic_bool _transfer_done; - size_t _direct_conjunct_size; - - int _total_assign_num; - int _nice; - - // protect _status, for many thread may change _status - SpinLock _status_mutex; - Status _status; - RuntimeState* _runtime_state; - - RuntimeProfile::Counter* _scan_timer; - RuntimeProfile::Counter* _scan_cpu_timer = nullptr; - RuntimeProfile::Counter* _tablet_counter; - RuntimeProfile::Counter* _rows_pushed_cond_filtered_counter = nullptr; - RuntimeProfile::Counter* _reader_init_timer = nullptr; - RuntimeProfile::Counter* _scanner_sched_counter = nullptr; - TResourceInfo* _resource_info; - - int64_t _buffered_bytes; - // Count the memory consumption of Rowset Reader and Tablet Reader in OlapScanner. - std::shared_ptr _scanner_mem_tracker; - EvalConjunctsFn _eval_conjuncts_fn; - - // the max num of scan keys of this scan request. - // it will set as BE's config `doris_max_scan_key_num`, - // or be overwritten by value in TQueryOptions - int32_t _max_scan_key_num = 1024; - // The max number of conditions in InPredicate that can be pushed down - // into OlapEngine. - // If conditions in InPredicate is larger than this, all conditions in - // InPredicate will not be pushed to the OlapEngine. - // it will set as BE's config `max_pushdown_conditions_per_column`, - // or be overwritten by value in TQueryOptions - int32_t _max_pushdown_conditions_per_column = 1024; - - struct RuntimeFilterContext { - RuntimeFilterContext() : apply_mark(false), runtimefilter(nullptr) {} - bool apply_mark; - IRuntimeFilter* runtimefilter; - }; - std::vector _runtime_filter_descs; - std::vector _runtime_filter_ctxs; - std::map _conjunctid_to_runtime_filter_ctxs; - - std::unique_ptr _scanner_profile; - std::unique_ptr _segment_profile; - - // Counters - RuntimeProfile::Counter* _io_timer = nullptr; - RuntimeProfile::Counter* _read_compressed_counter = nullptr; - RuntimeProfile::Counter* _decompressor_timer = nullptr; - RuntimeProfile::Counter* _read_uncompressed_counter = nullptr; - RuntimeProfile::Counter* _raw_rows_counter = nullptr; - - RuntimeProfile::Counter* _rows_vec_cond_counter = nullptr; - RuntimeProfile::Counter* _vec_cond_timer = nullptr; - RuntimeProfile::Counter* _short_cond_timer = nullptr; - RuntimeProfile::Counter* _first_read_timer = nullptr; - RuntimeProfile::Counter* _lazy_read_timer = nullptr; - RuntimeProfile::Counter* _output_col_timer = nullptr; - - RuntimeProfile::Counter* _stats_filtered_counter = nullptr; - RuntimeProfile::Counter* _bf_filtered_counter = nullptr; - RuntimeProfile::Counter* _del_filtered_counter = nullptr; - RuntimeProfile::Counter* _conditions_filtered_counter = nullptr; - RuntimeProfile::Counter* _key_range_filtered_counter = nullptr; - - RuntimeProfile::Counter* _block_seek_timer = nullptr; - RuntimeProfile::Counter* _block_seek_counter = nullptr; - RuntimeProfile::Counter* _block_convert_timer = nullptr; - RuntimeProfile::Counter* _block_load_timer = nullptr; - RuntimeProfile::Counter* _block_load_counter = nullptr; - RuntimeProfile::Counter* _block_fetch_timer = nullptr; - - RuntimeProfile::Counter* _index_load_timer = nullptr; - - // total pages read - // used by segment v2 - RuntimeProfile::Counter* _total_pages_num_counter = nullptr; - // page read from cache - // used by segment v2 - RuntimeProfile::Counter* _cached_pages_num_counter = nullptr; - - // row count filtered by bitmap inverted index - RuntimeProfile::Counter* _bitmap_index_filter_counter = nullptr; - // time fro bitmap inverted index read and filter - RuntimeProfile::Counter* _bitmap_index_filter_timer = nullptr; - // number of created olap scanners - RuntimeProfile::Counter* _num_scanners = nullptr; - - // number of segment filtered by column stat when creating seg iterator - RuntimeProfile::Counter* _filtered_segment_counter = nullptr; - // total number of segment related to this scan node - RuntimeProfile::Counter* _total_segment_counter = nullptr; - - RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr; - RuntimeProfile::Counter* _scanner_wait_worker_timer = nullptr; - - RuntimeProfile::Counter* _olap_wait_batch_queue_timer = nullptr; - - // for debugging or profiling, record any info as you want - RuntimeProfile::Counter* _general_debug_timer[GENERAL_DEBUG_COUNT] = {}; -}; - -} // namespace doris diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp deleted file mode 100644 index 72769f7769..0000000000 --- a/be/src/exec/olap_scanner.cpp +++ /dev/null @@ -1,675 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap_scanner.h" - -#include - -#include "common/utils.h" -#include "exprs/expr_context.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "olap/decimal12.h" -#include "olap/field.h" -#include "olap/storage_engine.h" -#include "olap/tablet_schema.h" -#include "olap/uint24.h" -#include "olap_scan_node.h" -#include "olap_utils.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/thread_context.h" -#include "service/backend_options.h" -#include "util/doris_metrics.h" -#include "util/mem_util.hpp" - -namespace doris { - -OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range, - const std::shared_ptr& tracker) - : _runtime_state(runtime_state), - _parent(parent), - _tuple_desc(parent->_tuple_desc), - _id(-1), - _is_open(false), - _aggregation(aggregation), - _need_agg_finalize(need_agg_finalize), - _version(-1), - _mem_tracker(tracker) { - _tablet_schema = std::make_shared(); -} - -Status OlapScanner::prepare( - const TPaloScanRange& scan_range, const std::vector& key_ranges, - const std::vector& filters, - const std::vector>>& bloom_filters, - const std::vector& function_filters) { - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); - set_tablet_reader(); - // set limit to reduce end of rowset and segment mem use - _tablet_reader->set_batch_size(_parent->_batch_size); - - // Get olap table - TTabletId tablet_id = scan_range.tablet_id; - SchemaHash schema_hash = strtoul(scan_range.schema_hash.c_str(), nullptr, 10); - _version = strtoul(scan_range.version.c_str(), nullptr, 10); - { - std::string err; - _tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err); - if (_tablet.get() == nullptr) { - std::stringstream ss; - ss << "failed to get tablet. tablet_id=" << tablet_id - << ", with schema_hash=" << schema_hash << ", reason=" << err; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - _tablet_schema->copy_from(*_tablet->tablet_schema()); - if (_parent->_olap_scan_node.__isset.columns_desc && - !_parent->_olap_scan_node.columns_desc.empty() && - _parent->_olap_scan_node.columns_desc[0].col_unique_id >= 0) { - _tablet_schema->clear_columns(); - for (const auto& column_desc : _parent->_olap_scan_node.columns_desc) { - _tablet_schema->append_column(TabletColumn(column_desc)); - } - } - { - std::shared_lock rdlock(_tablet->get_header_lock()); - const RowsetSharedPtr rowset = _tablet->rowset_with_max_version(); - if (rowset == nullptr) { - std::stringstream ss; - ss << "fail to get latest version of tablet: " << tablet_id; - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - - // acquire tablet rowset readers at the beginning of the scan node - // to prevent this case: when there are lots of olap scanners to run for example 10000 - // the rowsets maybe compacted when the last olap scanner starts - Version rd_version(0, _version); - Status acquire_reader_st = - _tablet->capture_rs_readers(rd_version, &_tablet_reader_params.rs_readers); - if (!acquire_reader_st.ok()) { - LOG(WARNING) << "fail to init reader.res=" << acquire_reader_st; - std::stringstream ss; - ss << "failed to initialize storage reader. tablet=" << _tablet->full_name() - << ", res=" << acquire_reader_st - << ", backend=" << BackendOptions::get_localhost(); - return Status::InternalError(ss.str().c_str()); - } - // Initialize _params - RETURN_IF_ERROR(_init_tablet_reader_params(key_ranges, filters, bloom_filters, - function_filters)); - } - } - - return Status::OK(); -} - -TabletStorageType OlapScanner::get_storage_type() { - int local_reader = 0; - for (const auto& reader : _tablet_reader_params.rs_readers) { - if (reader->rowset()->rowset_meta()->resource_id().empty()) { - local_reader++; - } - } - int total_reader = _tablet_reader_params.rs_readers.size(); - - if (local_reader == total_reader) { - return TabletStorageType::STORAGE_TYPE_LOCAL; - } else if (local_reader == 0) { - return TabletStorageType::STORAGE_TYPE_REMOTE; - } - return TabletStorageType::STORAGE_TYPE_REMOTE_AND_LOCAL; -} - -Status OlapScanner::open() { - auto span = _runtime_state->get_tracer()->StartSpan("OlapScanner::open"); - auto scope = opentelemetry::trace::Scope {span}; - SCOPED_TIMER(_parent->_reader_init_timer); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); - - if (_conjunct_ctxs.size() > _parent->_direct_conjunct_size) { - _use_pushdown_conjuncts = true; - } - - _runtime_filter_marks.resize(_parent->runtime_filter_descs().size(), false); - - auto res = _tablet_reader->init(_tablet_reader_params); - if (!res.ok()) { - std::stringstream ss; - ss << "failed to initialize storage reader. tablet=" - << _tablet_reader_params.tablet->full_name() << ", res=" << res - << ", backend=" << BackendOptions::get_localhost(); - return Status::InternalError(ss.str().c_str()); - } - return Status::OK(); -} - -// it will be called under tablet read lock because capture rs readers need -Status OlapScanner::_init_tablet_reader_params( - const std::vector& key_ranges, const std::vector& filters, - const std::vector>>& bloom_filters, - const std::vector& function_filters) { - // if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty - bool single_version = - (_tablet_reader_params.rs_readers.size() == 1 && - _tablet_reader_params.rs_readers[0]->rowset()->start_version() == 0 && - !_tablet_reader_params.rs_readers[0] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()) || - (_tablet_reader_params.rs_readers.size() == 2 && - _tablet_reader_params.rs_readers[0]->rowset()->rowset_meta()->num_rows() == 0 && - _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 && - !_tablet_reader_params.rs_readers[1] - ->rowset() - ->rowset_meta() - ->is_segments_overlapping()); - - _tablet_reader_params.direct_mode = single_version || _aggregation; - - RETURN_IF_ERROR(_init_return_columns(!_tablet_reader_params.direct_mode)); - - _tablet_reader_params.tablet = _tablet; - _tablet_reader_params.tablet_schema = _tablet_schema; - _tablet_reader_params.reader_type = READER_QUERY; - _tablet_reader_params.aggregation = _aggregation; - _tablet_reader_params.version = Version(0, _version); - - // Condition - for (auto& filter : filters) { - _tablet_reader_params.conditions.push_back(filter); - } - std::copy(bloom_filters.cbegin(), bloom_filters.cend(), - std::inserter(_tablet_reader_params.bloom_filters, - _tablet_reader_params.bloom_filters.begin())); - - std::copy(function_filters.cbegin(), function_filters.cend(), - std::inserter(_tablet_reader_params.function_filters, - _tablet_reader_params.function_filters.begin())); - auto& delete_preds = _tablet->delete_predicates(); - std::copy(delete_preds.cbegin(), delete_preds.cend(), - std::inserter(_tablet_reader_params.delete_predicates, - _tablet_reader_params.delete_predicates.begin())); - // Merge the columns in delete predicate that not in latest schema in to current tablet schema - for (auto& del_pred_rs : _tablet_reader_params.delete_predicates) { - _tablet_schema->merge_dropped_columns(_tablet->tablet_schema(del_pred_rs->version())); - } - // Range - for (auto key_range : key_ranges) { - if (key_range->begin_scan_range.size() == 1 && - key_range->begin_scan_range.get_value(0) == NEGATIVE_INFINITY) { - continue; - } - - _tablet_reader_params.start_key_include = key_range->begin_include; - _tablet_reader_params.end_key_include = key_range->end_include; - - _tablet_reader_params.start_key.push_back(key_range->begin_scan_range); - _tablet_reader_params.end_key.push_back(key_range->end_scan_range); - } - - // TODO(zc) - _tablet_reader_params.profile = _parent->runtime_profile(); - _tablet_reader_params.runtime_state = _runtime_state; - _tablet_reader_params.origin_return_columns = &_return_columns; - _tablet_reader_params.tablet_columns_convert_to_null_set = &_tablet_columns_convert_to_null_set; - - if (_tablet_reader_params.direct_mode) { - _tablet_reader_params.return_columns = _return_columns; - } else { - // we need to fetch all key columns to do the right aggregation on storage engine side. - for (size_t i = 0; i < _tablet->num_key_columns(); ++i) { - _tablet_reader_params.return_columns.push_back(i); - } - for (auto index : _return_columns) { - if (_tablet_schema->column(index).is_key()) { - continue; - } else { - _tablet_reader_params.return_columns.push_back(index); - } - } - } - - // use _tablet_reader_params.return_columns, because reader use this to merge sort - Status res = _read_row_cursor.init(_tablet_schema, _tablet_reader_params.return_columns); - if (!res.ok()) { - LOG(WARNING) << "fail to init row cursor.res = " << res; - return Status::InternalError("failed to initialize storage read row cursor"); - } - _read_row_cursor.allocate_memory_for_string_type(_tablet_schema); - - // If a agg node is this scan node direct parent - // we will not call agg object finalize method in scan node, - // to avoid the unnecessary SerDe and improve query performance - _tablet_reader_params.need_agg_finalize = _need_agg_finalize; - - if (!config::disable_storage_page_cache) { - _tablet_reader_params.use_page_cache = true; - } - - if (_tablet->enable_unique_key_merge_on_write()) { - _tablet_reader_params.delete_bitmap = &_tablet->tablet_meta()->delete_bitmap(); - } - - return Status::OK(); -} - -Status OlapScanner::_init_return_columns(bool need_seq_col) { - for (auto slot : _tuple_desc->slots()) { - if (!slot->is_materialized()) { - continue; - } - int32_t index = slot->col_unique_id() >= 0 - ? _tablet_schema->field_index(slot->col_unique_id()) - : _tablet_schema->field_index(slot->col_name()); - if (index < 0) { - std::stringstream ss; - ss << "field name is invalid. field=" << slot->col_name(); - LOG(WARNING) << ss.str(); - return Status::InternalError(ss.str()); - } - _return_columns.push_back(index); - if (slot->is_nullable() && !_tablet_schema->column(index).is_nullable()) - _tablet_columns_convert_to_null_set.emplace(index); - _query_slots.push_back(slot); - } - - // expand the sequence column - if (_tablet_schema->has_sequence_col() && need_seq_col) { - bool has_replace_col = false; - for (auto col : _return_columns) { - if (_tablet_schema->column(col).aggregation() == - FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE) { - has_replace_col = true; - break; - } - } - if (auto sequence_col_idx = _tablet_schema->sequence_col_idx(); - has_replace_col && std::find(_return_columns.begin(), _return_columns.end(), - sequence_col_idx) == _return_columns.end()) { - _return_columns.push_back(sequence_col_idx); - } - } - - if (_return_columns.empty()) { - return Status::InternalError("failed to build storage scanner, no materialized slot!"); - } - return Status::OK(); -} - -Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); - // 2. Allocate Row's Tuple buf, it will improve performance if there are many var length columns - uint8_t* tuple_buf = batch->tuple_data_pool()->allocate(_batch_size * _tuple_desc->byte_size()); - if (tuple_buf == nullptr) { - LOG(WARNING) << "Allocate mem for row batch failed."; - return Status::RuntimeError("Allocate mem for row batch failed."); - } - bzero(tuple_buf, _batch_size * _tuple_desc->byte_size()); - Tuple* tuple = reinterpret_cast(tuple_buf); - - std::unique_ptr mem_pool(new MemPool(_mem_tracker.get())); - int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num; - int64_t raw_bytes_threshold = config::doris_scanner_row_bytes; - { - SCOPED_TIMER(_parent->_scan_timer); - // store the object which may can't pass the conjuncts temporarily. - // otherwise, pushed all objects into agg_object_pool directly may lead to OOM. - ObjectPool tmp_object_pool; - // release the memory of the object which can't pass the conjuncts. - ObjectPool unused_object_pool; - while (true) { - // Batch is full or reach raw_rows_threshold or raw_bytes_threshold, break - // Use total_byte_size here, not tuple_pool's allocated bytes, because we preallocated tuple pool at beginning - // its size maybe larger than threshold, so that scanner will break here and may dead loop. - // Not need check num_rows > 0, because total_byte_size() == 0 if num_rows == 0. - if (_avg_row_size == 0 && batch->num_rows() > 0) { - // total_byte_size() cost a lot of CPU time, so that compute avg row size here. - _first_batch_row_num += batch->num_rows(); - _first_batch_size += batch->total_byte_size(); - // Accumulate many batches and then calculate avg row size to avoid there are only small number of rows - if (_first_batch_size > raw_bytes_threshold) { - _avg_row_size = _first_batch_size / _first_batch_row_num; - } - } - int64_t batch_total_bytes = _avg_row_size > 0 ? _avg_row_size * batch->num_rows() - : batch->total_byte_size(); - if (batch->is_full() || batch_total_bytes >= raw_bytes_threshold || - raw_rows_read() >= raw_rows_threshold) { - _update_realtime_counter(); - break; - } - - if (tmp_object_pool.size() > 0) { - unused_object_pool.acquire_data(&tmp_object_pool); - } - - if (unused_object_pool.size() >= config::object_pool_buffer_size) { - unused_object_pool.clear(); - } - - // Read one row from reader - auto res = _tablet_reader->next_row_with_aggregation(&_read_row_cursor, mem_pool.get(), - &tmp_object_pool, eof); - if (!res.ok()) { - return Status::InternalError( - "Internal Error: read storage fail. res={}, tablet={}, backend={}", - res.to_string(), _tablet->full_name(), BackendOptions::get_localhost()); - } - // If we reach end of this scanner, break - if (UNLIKELY(*eof)) { - break; - } - _num_rows_read++; - - _convert_row_to_tuple(tuple); - if (VLOG_ROW_IS_ON) { - VLOG_ROW << "OlapScanner input row: " << Tuple::to_string(tuple, *_tuple_desc); - } - - if (_num_rows_read % RELEASE_CONTEXT_COUNTER == 0) { - ExprContext::free_local_allocations(_conjunct_ctxs); - } - - // 3.4 Set tuple to RowBatch(not committed) - int row_idx = batch->add_row(); - TupleRow* row = batch->get_row(row_idx); - row->set_tuple(_parent->_tuple_idx, tuple); - - auto direct_conjunct_size = _parent->_direct_conjunct_size; - - do { - // 3.5.1 Using direct conjuncts to filter data - if (_eval_conjuncts_fn != nullptr) { - if (!_eval_conjuncts_fn(&_conjunct_ctxs[0], direct_conjunct_size, row)) { - // check direct conjuncts fail then clear tuple for reuse - // make sure to reset null indicators since we're overwriting - // the tuple assembled for the previous row - tuple->init(_tuple_desc->byte_size()); - break; - } - } else { - if (!ExecNode::eval_conjuncts(&_conjunct_ctxs[0], direct_conjunct_size, row)) { - // check direct conjuncts fail then clear tuple for reuse - // make sure to reset null indicators since we're overwriting - // the tuple assembled for the previous row - tuple->init(_tuple_desc->byte_size()); - break; - } - } - - // 3.5.2 Using pushdown conjuncts to filter data - if (_use_pushdown_conjuncts) { - if (!ExecNode::eval_conjuncts(&_conjunct_ctxs[direct_conjunct_size], - _conjunct_ctxs.size() - direct_conjunct_size, - row)) { - // check pushdown conjuncts fail then clear tuple for reuse - // make sure to reset null indicators since we're overwriting - // the tuple assembled for the previous row - tuple->init(_tuple_desc->byte_size()); - _num_rows_pushed_cond_filtered++; - break; - } - } - - // Copy string slot - for (auto desc : _parent->_string_slots) { - StringValue* slot = tuple->get_string_slot(desc->tuple_offset()); - if (slot->len != 0) { - uint8_t* v = batch->tuple_data_pool()->allocate(slot->len); - memory_copy(v, slot->ptr, slot->len); - slot->ptr = reinterpret_cast(v); - } - } - - // Copy collection slot - for (auto desc : _parent->_collection_slots) { - CollectionValue* slot = tuple->get_collection_slot(desc->tuple_offset()); - const TypeDescriptor& item_type = desc->type().children.at(0); - auto pool = batch->tuple_data_pool(); - CollectionValue::deep_copy_collection( - slot, item_type, - [pool](int64_t size) -> MemFootprint { - int64_t offset = pool->total_allocated_bytes(); - uint8_t* data = pool->allocate(size); - return {offset, data}; - }, - false); - } - // the memory allocate by mem pool has been copied, - // so we should release these memory immediately - mem_pool->clear(); - - if (VLOG_ROW_IS_ON) { - VLOG_ROW << "OlapScanner output row: " << Tuple::to_string(tuple, *_tuple_desc); - } - - // check direct && pushdown conjuncts success then commit tuple - batch->commit_last_row(); - batch->agg_object_pool()->acquire_data(&tmp_object_pool); - char* new_tuple = reinterpret_cast(tuple); - new_tuple += _tuple_desc->byte_size(); - tuple = reinterpret_cast(new_tuple); - - // compute pushdown conjuncts filter rate - if (_use_pushdown_conjuncts) { - // check this rate after - if (_num_rows_read > 32768) { - int32_t pushdown_return_rate = - _num_rows_read * 100 / - (_num_rows_read + _num_rows_pushed_cond_filtered); - if (pushdown_return_rate > - config::doris_max_pushdown_conjuncts_return_rate) { - _use_pushdown_conjuncts = false; - VLOG_CRITICAL << "Stop Using PushDown Conjuncts. " - << "PushDownReturnRate: " << pushdown_return_rate << "%" - << " MaxPushDownReturnRate: " - << config::doris_max_pushdown_conjuncts_return_rate - << "%"; - } - } - } - } while (false); - } - } - - return Status::OK(); -} - -void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { - size_t slots_size = _query_slots.size(); - for (int i = 0; i < slots_size; ++i) { - SlotDescriptor* slot_desc = _query_slots[i]; - auto cid = _return_columns[i]; - if (_read_row_cursor.is_null(cid)) { - tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } - char* ptr = (char*)_read_row_cursor.cell_ptr(cid); - size_t len = _read_row_cursor.column_size(cid); - switch (slot_desc->type().type) { - case TYPE_CHAR: { - Slice* slice = reinterpret_cast(ptr); - StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset()); - slot->ptr = slice->data; - slot->len = strnlen(slot->ptr, slice->size); - break; - } - case TYPE_VARCHAR: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_HLL: - case TYPE_STRING: { - Slice* slice = reinterpret_cast(ptr); - StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset()); - slot->ptr = slice->data; - slot->len = slice->size; - break; - } - case TYPE_DECIMALV2: { - DecimalV2Value* slot = tuple->get_decimalv2_slot(slot_desc->tuple_offset()); - auto packed_decimal = *reinterpret_cast(ptr); - - // We convert the format for storage to the format for computation. - // Coding coverting in the opposite direction is in AggregateFuncTraits - // for decimal. - int64_t int_value = packed_decimal.integer; - int32_t frac_value = packed_decimal.fraction; - if (!slot->from_olap_decimal(int_value, frac_value)) { - tuple->set_null(slot_desc->null_indicator_offset()); - } - break; - } - case TYPE_DATETIME: { - DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); - uint64_t value = *reinterpret_cast(ptr); - if (!slot->from_olap_datetime(value)) { - tuple->set_null(slot_desc->null_indicator_offset()); - } - break; - } - case TYPE_DATE: { - DateTimeValue* slot = tuple->get_datetime_slot(slot_desc->tuple_offset()); - - uint24_t date = *reinterpret_cast(ptr); - uint64_t value = uint32_t(date); - - if (!slot->from_olap_date(value)) { - tuple->set_null(slot_desc->null_indicator_offset()); - } - break; - } - case TYPE_ARRAY: { - CollectionValue* array_v = reinterpret_cast(ptr); - CollectionValue* slot = tuple->get_collection_slot(slot_desc->tuple_offset()); - slot->shallow_copy(array_v); - break; - } - default: { - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - memory_copy(slot, ptr, len); - break; - } - } - } -} - -void OlapScanner::update_counter() { - if (_has_update_counter) { - return; - } - auto& stats = _tablet_reader->stats(); - - COUNTER_UPDATE(_parent->rows_read_counter(), _num_rows_read); - COUNTER_UPDATE(_parent->_rows_pushed_cond_filtered_counter, _num_rows_pushed_cond_filtered); - - COUNTER_UPDATE(_parent->_io_timer, stats.io_ns); - COUNTER_UPDATE(_parent->_read_compressed_counter, stats.compressed_bytes_read); - _compressed_bytes_read += stats.compressed_bytes_read; - COUNTER_UPDATE(_parent->_decompressor_timer, stats.decompress_ns); - COUNTER_UPDATE(_parent->_read_uncompressed_counter, stats.uncompressed_bytes_read); - COUNTER_UPDATE(_parent->bytes_read_counter(), stats.bytes_read); - - COUNTER_UPDATE(_parent->_block_load_timer, stats.block_load_ns); - COUNTER_UPDATE(_parent->_block_load_counter, stats.blocks_load); - COUNTER_UPDATE(_parent->_block_fetch_timer, stats.block_fetch_ns); - COUNTER_UPDATE(_parent->_block_seek_timer, stats.block_seek_ns); - COUNTER_UPDATE(_parent->_block_convert_timer, stats.block_convert_ns); - - COUNTER_UPDATE(_parent->_raw_rows_counter, stats.raw_rows_read); - // if raw_rows_read is reset, scanNode will scan all table rows which may cause BE crash - _raw_rows_read += _tablet_reader->mutable_stats()->raw_rows_read; - // COUNTER_UPDATE(_parent->_filtered_rows_counter, stats.num_rows_filtered); - COUNTER_UPDATE(_parent->_vec_cond_timer, stats.vec_cond_ns); - COUNTER_UPDATE(_parent->_short_cond_timer, stats.short_cond_ns); - COUNTER_UPDATE(_parent->_first_read_timer, stats.first_read_ns); - COUNTER_UPDATE(_parent->_lazy_read_timer, stats.lazy_read_ns); - COUNTER_UPDATE(_parent->_output_col_timer, stats.output_col_ns); - COUNTER_UPDATE(_parent->_rows_vec_cond_counter, stats.rows_vec_cond_filtered); - - COUNTER_UPDATE(_parent->_stats_filtered_counter, stats.rows_stats_filtered); - COUNTER_UPDATE(_parent->_bf_filtered_counter, stats.rows_bf_filtered); - COUNTER_UPDATE(_parent->_del_filtered_counter, stats.rows_del_filtered); - COUNTER_UPDATE(_parent->_del_filtered_counter, stats.rows_del_by_bitmap); - COUNTER_UPDATE(_parent->_del_filtered_counter, stats.rows_vec_del_cond_filtered); - - COUNTER_UPDATE(_parent->_conditions_filtered_counter, stats.rows_conditions_filtered); - COUNTER_UPDATE(_parent->_key_range_filtered_counter, stats.rows_key_range_filtered); - - COUNTER_UPDATE(_parent->_index_load_timer, stats.index_load_ns); - - size_t timer_count = sizeof(stats.general_debug_ns) / sizeof(*stats.general_debug_ns); - for (size_t i = 0; i < timer_count; ++i) { - COUNTER_UPDATE(_parent->_general_debug_timer[i], stats.general_debug_ns[i]); - } - - COUNTER_UPDATE(_parent->_total_pages_num_counter, stats.total_pages_num); - COUNTER_UPDATE(_parent->_cached_pages_num_counter, stats.cached_pages_num); - - COUNTER_UPDATE(_parent->_bitmap_index_filter_counter, stats.rows_bitmap_index_filtered); - COUNTER_UPDATE(_parent->_bitmap_index_filter_timer, stats.bitmap_index_filter_timer); - COUNTER_UPDATE(_parent->_block_seek_counter, stats.block_seek_num); - - COUNTER_UPDATE(_parent->_filtered_segment_counter, stats.filtered_segment_number); - COUNTER_UPDATE(_parent->_total_segment_counter, stats.total_segment_number); - - DorisMetrics::instance()->query_scan_bytes->increment(_compressed_bytes_read); - DorisMetrics::instance()->query_scan_rows->increment(_raw_rows_read); - - _tablet->query_scan_bytes->increment(_compressed_bytes_read); - _tablet->query_scan_rows->increment(_raw_rows_read); - _tablet->query_scan_count->increment(1); - - _has_update_counter = true; -} - -void OlapScanner::_update_realtime_counter() { - auto& stats = _tablet_reader->stats(); - COUNTER_UPDATE(_parent->_read_compressed_counter, stats.compressed_bytes_read); - _compressed_bytes_read += stats.compressed_bytes_read; - _tablet_reader->mutable_stats()->compressed_bytes_read = 0; - - COUNTER_UPDATE(_parent->_raw_rows_counter, stats.raw_rows_read); - // if raw_rows_read is reset, scanNode will scan all table rows which may cause BE crash - _raw_rows_read += stats.raw_rows_read; - - _tablet_reader->mutable_stats()->raw_rows_read = 0; -} - -Status OlapScanner::close(RuntimeState* state) { - if (_is_closed) { - return Status::OK(); - } - // olap scan node will call scanner.close() when finished - // will release resources here - // if not clear rowset readers in read_params here - // readers will be release when runtime state deconstructed but - // deconstructor in reader references runtime state - // so that it will core - _tablet_reader_params.rs_readers.clear(); - update_counter(); - _tablet_reader.reset(); - Expr::close(_conjunct_ctxs, state); - _is_closed = true; - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h deleted file mode 100644 index 0f45954fae..0000000000 --- a/be/src/exec/olap_scanner.h +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/exec_node.h" -#include "exec/olap_utils.h" -#include "exprs/bloomfilter_predicate.h" -#include "exprs/expr.h" -#include "exprs/function_filter.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "olap/tuple_reader.h" -#include "runtime/descriptors.h" -#include "runtime/tuple.h" - -namespace doris { - -class OlapScanNode; - -class OlapScanner { -public: - OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range, - const std::shared_ptr& tracker); - - virtual ~OlapScanner() = default; - - Status prepare(const TPaloScanRange& scan_range, const std::vector& key_ranges, - const std::vector& filters, - const std::vector>>& - bloom_filters, - const std::vector& function_filters); - - Status open(); - - virtual Status get_batch(RuntimeState* state, RowBatch* batch, bool* eof); - - virtual Status close(RuntimeState* state); - - RuntimeState* runtime_state() { return _runtime_state; } - - std::vector* conjunct_ctxs() { return &_conjunct_ctxs; } - - int id() const { return _id; } - void set_id(int id) { _id = id; } - bool is_open() const { return _is_open; } - void set_opened() { _is_open = true; } - - int64_t raw_rows_read() const { return _raw_rows_read; } - - void update_counter(); - - const std::string& scan_disk() const { return _tablet->data_dir()->path(); } - - void start_wait_worker_timer() { - _watcher.reset(); - _watcher.start(); - } - - int64_t update_wait_worker_timer() const { return _watcher.elapsed_time(); } - - void set_use_pushdown_conjuncts(bool has_pushdown_conjuncts) { - _use_pushdown_conjuncts = has_pushdown_conjuncts; - } - - std::vector* mutable_runtime_filter_marks() { return &_runtime_filter_marks; } - - const std::vector& get_query_slots() const { return _query_slots; } - - TabletStorageType get_storage_type(); - - void set_batch_size(size_t batch_size) { _batch_size = batch_size; } - -protected: - Status _init_tablet_reader_params( - const std::vector& key_ranges, const std::vector& filters, - const std::vector>>& - bloom_filters, - const std::vector& function_filters); - Status _init_return_columns(bool need_seq_col); - void _convert_row_to_tuple(Tuple* tuple); - - // Update profile that need to be reported in realtime. - void _update_realtime_counter(); - - virtual void set_tablet_reader() { _tablet_reader = std::make_unique(); } - -protected: - RuntimeState* _runtime_state; - OlapScanNode* _parent; - const TupleDescriptor* _tuple_desc; /**< tuple descriptor */ - - std::vector _conjunct_ctxs; - // to record which runtime filters have been used - std::vector _runtime_filter_marks; - - int _id; - bool _is_open; - bool _aggregation; - bool _need_agg_finalize = true; - bool _has_update_counter = false; - bool _use_pushdown_conjuncts = false; - - TabletReader::ReaderParams _tablet_reader_params; - std::unique_ptr _tablet_reader; - - TabletSharedPtr _tablet; - int64_t _version; - - std::vector _return_columns; - std::unordered_set _tablet_columns_convert_to_null_set; - - RowCursor _read_row_cursor; - - std::vector _query_slots; - - // time costed and row returned statistics - ExecNode::EvalConjunctsFn _eval_conjuncts_fn = nullptr; - - int64_t _num_rows_read = 0; - int64_t _raw_rows_read = 0; - int64_t _compressed_bytes_read = 0; - int64_t _avg_row_size = 0; - int64_t _first_batch_row_num = 0; - int64_t _first_batch_size = 0; - - size_t _batch_size = 0; - - // number rows filtered by pushed condition - int64_t _num_rows_pushed_cond_filtered = 0; - - bool _is_closed = false; - - MonotonicStopWatch _watcher; - - std::shared_ptr _mem_tracker; - - TabletSchemaSPtr _tablet_schema; -}; - -} // namespace doris diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp deleted file mode 100644 index 2dddf2baf1..0000000000 --- a/be/src/exec/orc_scanner.cpp +++ /dev/null @@ -1,431 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/orc_scanner.h" - -#include "io/file_factory.h" -#include "runtime/exec_env.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" - -// orc include file didn't expose orc::TimezoneError -// we have to declare it by hand, following is the source code in orc link -// https://github.com/apache/orc/blob/84353fbfc447b06e0924024a8e03c1aaebd3e7a5/c%2B%2B/src/Timezone.hh#L104-L109 -namespace orc { - -class TimezoneError : public std::runtime_error { -public: - TimezoneError(const std::string& what); - TimezoneError(const TimezoneError&); - virtual ~TimezoneError() noexcept; -}; - -} // namespace orc - -namespace doris { - -class ORCFileStream : public orc::InputStream { -public: - ORCFileStream(FileReader* file, std::string filename) - : _file(file), _filename(std::move(filename)) {} - - ~ORCFileStream() override { - if (_file != nullptr) { - _file->close(); - delete _file; - _file = nullptr; - } - } - - /** - * Get the total length of the file in bytes. - */ - uint64_t getLength() const override { return _file->size(); } - - /** - * Get the natural size for reads. - * @return the number of bytes that should be read at once - */ - uint64_t getNaturalReadSize() const override { return 128 * 1024; } - - /** - * Read length bytes from the file starting at offset into - * the buffer starting at buf. - * @param buf the starting position of a buffer. - * @param length the number of bytes to read. - * @param offset the position in the stream to read from. - */ - void read(void* buf, uint64_t length, uint64_t offset) override { - if (buf == nullptr) { - throw orc::ParseError("Buffer is null"); - } - - int64_t bytes_read = 0; - int64_t reads = 0; - while (bytes_read < length) { - Status result = _file->readat(offset, length - bytes_read, &reads, buf); - if (!result.ok()) { - throw orc::ParseError("Bad read of " + _filename); - } - if (reads == 0) { - break; - } - bytes_read += reads; // total read bytes - offset += reads; - buf = (char*)buf + reads; - } - if (length != bytes_read) { - throw orc::ParseError("Short read of " + _filename + - ". expected :" + std::to_string(length) + - ", actual : " + std::to_string(bytes_read)); - } - } - - /** - * Get the name of the stream for error messages. - */ - const std::string& getName() const override { return _filename; } - -private: - FileReader* _file; - std::string _filename; -}; - -ORCScanner::ORCScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter) - : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), - // _splittable(params.splittable), - _cur_file_eof(true), - _total_groups(0), - _current_group(0), - _rows_of_group(0), - _current_line_of_group(0) {} - -ORCScanner::~ORCScanner() { - close(); -} - -Status ORCScanner::open() { - RETURN_IF_ERROR(BaseScanner::open()); - if (!_ranges.empty()) { - std::list include_cols; - TBrokerRangeDesc range = _ranges[0]; - _num_of_columns_from_file = range.__isset.num_of_columns_from_file - ? range.num_of_columns_from_file - : _src_slot_descs.size(); - for (int i = 0; i < _num_of_columns_from_file; i++) { - auto slot_desc = _src_slot_descs.at(i); - include_cols.push_back(slot_desc->col_name()); - } - _row_reader_options.include(include_cols); - } - - return Status::OK(); -} - -Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { - try { - SCOPED_TIMER(_read_timer); - // Get one line - while (!_scanner_eof) { - if (_cur_file_eof) { - RETURN_IF_ERROR(open_next_reader()); - if (_scanner_eof) { - *eof = true; - return Status::OK(); - } else { - _cur_file_eof = false; - } - } - if (_current_line_of_group >= _rows_of_group) { // read next stripe - if (_current_group >= _total_groups) { - _cur_file_eof = true; - continue; - } - _rows_of_group = _reader->getStripe(_current_group)->getNumberOfRows(); - _batch = _row_reader->createRowBatch(_rows_of_group); - _row_reader->next(*_batch.get()); - - _current_line_of_group = 0; - ++_current_group; - } - - const std::vector& batch_vec = - ((orc::StructVectorBatch*)_batch.get())->fields; - for (int column_ipos = 0; column_ipos < _num_of_columns_from_file; ++column_ipos) { - auto slot_desc = _src_slot_descs[column_ipos]; - orc::ColumnVectorBatch* cvb = batch_vec[_position_in_orc_original[column_ipos]]; - - if (cvb->hasNulls && !cvb->notNull[_current_line_of_group]) { - if (!slot_desc->is_nullable()) { - std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() - << ") is not nullable "; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - _src_tuple->set_null(slot_desc->null_indicator_offset()); - } else { - int32_t wbytes = 0; - uint8_t tmp_buf[128] = {0}; - if (slot_desc->is_nullable()) { - _src_tuple->set_not_null(slot_desc->null_indicator_offset()); - } - void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - - switch (_row_reader->getSelectedType() - .getSubtype(_position_in_orc_original[column_ipos]) - ->getKind()) { - case orc::BOOLEAN: { - int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group]; - if (value == 0) { - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(5)); - memcpy(str_slot->ptr, "false", 5); - str_slot->len = 5; - } else { - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(4)); - memcpy(str_slot->ptr, "true", 4); - str_slot->len = 4; - } - break; - } - case orc::BYTE: - case orc::INT: - case orc::SHORT: - case orc::LONG: { - int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group]; - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRId64, value); - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - case orc::FLOAT: - case orc::DOUBLE: { - double value = ((orc::DoubleVectorBatch*)cvb)->data[_current_line_of_group]; - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%.9f", value); - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - case orc::BINARY: - case orc::CHAR: - case orc::VARCHAR: - case orc::STRING: { - char* value = ((orc::StringVectorBatch*)cvb)->data[_current_line_of_group]; - wbytes = ((orc::StringVectorBatch*)cvb)->length[_current_line_of_group]; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, value, wbytes); - str_slot->len = wbytes; - break; - } - case orc::DECIMAL: { - int precision = ((orc::Decimal64VectorBatch*)cvb)->precision; - int scale = ((orc::Decimal64VectorBatch*)cvb)->scale; - - //Decimal64VectorBatch handles decimal columns with precision no greater than 18. - //Decimal128VectorBatch handles the others. - std::string decimal_str; - if (precision <= 18) { - decimal_str = std::to_string(((orc::Decimal64VectorBatch*)cvb) - ->values[_current_line_of_group]); - } else { - decimal_str = ((orc::Decimal128VectorBatch*)cvb) - ->values[_current_line_of_group] - .toString(); - } - - int negative = decimal_str[0] == '-' ? 1 : 0; - int decimal_scale_length = decimal_str.size() - negative; - - std::string v; - if (decimal_scale_length <= scale) { - // decimal(5,2) : the integer of 0.01 is 1, so we should fill 0 before integer - v = std::string(negative ? "-0." : "0."); - int fill_zero = scale - decimal_scale_length; - while (fill_zero--) { - v += "0"; - } - if (negative) { - v += decimal_str.substr(1, decimal_str.length()); - } else { - v += decimal_str; - } - } else { - //Orc api will fill in 0 at the end, so size must greater than scale - v = decimal_str.substr(0, decimal_str.size() - scale) + "." + - decimal_str.substr(decimal_str.size() - scale); - } - - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(v.size())); - memcpy(str_slot->ptr, v.c_str(), v.size()); - str_slot->len = v.size(); - break; - } - case orc::DATE: { - //Date columns record the number of days since the UNIX epoch (1/1/1970 in UTC). - int64_t timestamp = - ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group] * 24 * - 60 * 60; - DateTimeValue dtv; - if (!dtv.from_unixtime(timestamp, "UTC")) { - std::stringstream str_error; - str_error - << "Parse timestamp (" + std::to_string(timestamp) + ") error"; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - dtv.cast_to_date(); - char* buf_end = dtv.to_string((char*)tmp_buf); - wbytes = buf_end - (char*)tmp_buf - 1; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - case orc::TIMESTAMP: { - //The time zone of orc's timestamp is stored inside orc's stripe information, - //so the timestamp obtained here is an offset timestamp, so parse timestamp with UTC is actual datetime literal. - int64_t timestamp = - ((orc::TimestampVectorBatch*)cvb)->data[_current_line_of_group]; - DateTimeValue dtv; - if (!dtv.from_unixtime(timestamp, "UTC")) { - std::stringstream str_error; - str_error - << "Parse timestamp (" + std::to_string(timestamp) + ") error"; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - char* buf_end = dtv.to_string((char*)tmp_buf); - wbytes = buf_end - (char*)tmp_buf - 1; - str_slot->ptr = reinterpret_cast(tuple_pool->allocate(wbytes)); - memcpy(str_slot->ptr, tmp_buf, wbytes); - str_slot->len = wbytes; - break; - } - default: { - std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() - << ") type not support. "; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - } - } - } - ++_current_line_of_group; - - // range of current file - const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); - if (range.__isset.num_of_columns_from_file) { - fill_slots_of_columns_from_path(range.num_of_columns_from_file, - range.columns_from_path); - } - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(fill_dest_tuple(tuple, tuple_pool, fill_tuple)); - break; - } - if (_scanner_eof) { - *eof = true; - } else { - *eof = false; - } - return Status::OK(); - } catch (orc::ParseError& e) { - std::stringstream str_error; - str_error << "ParseError : " << e.what(); - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } catch (orc::InvalidArgument& e) { - std::stringstream str_error; - str_error << "ParseError : " << e.what(); - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } catch (orc::TimezoneError& e) { - std::stringstream str_error; - str_error << "TimezoneError : " << e.what(); - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } -} - -Status ORCScanner::open_next_reader() { - while (true) { - if (_next_range >= _ranges.size()) { - _scanner_eof = true; - return Status::OK(); - } - const TBrokerRangeDesc& range = _ranges[_next_range++]; - std::unique_ptr file_reader; - RETURN_IF_ERROR(FileFactory::create_file_reader( - range.file_type, _state->exec_env(), _profile, _broker_addresses, - _params.properties, range, range.start_offset, file_reader)); - RETURN_IF_ERROR(file_reader->open()); - - if (file_reader->size() == 0) { - file_reader->close(); - continue; - } - - std::unique_ptr inStream = std::unique_ptr( - new ORCFileStream(file_reader.release(), range.path)); - _reader = orc::createReader(std::move(inStream), _options); - - // Something the upstream system(eg, hive) may create empty orc file - // which only has a header and footer, without schema. - // And if we call `_reader->createRowReader()` with selected columns, - // it will throw ParserError: Invalid column selected xx. - // So here we first check its number of rows and skip these kind of files. - if (_reader->getNumberOfRows() == 0) { - continue; - } - - _total_groups = _reader->getNumberOfStripes(); - _current_group = 0; - _rows_of_group = 0; - _current_line_of_group = 0; - _row_reader = _reader->createRowReader(_row_reader_options); - - //include_colus is in loader columns order, and batch is in the orc order - _position_in_orc_original.clear(); - _position_in_orc_original.resize(_num_of_columns_from_file); - int orc_index = 0; - auto include_cols = _row_reader_options.getIncludeNames(); - for (int i = 0; i < _row_reader->getSelectedType().getSubtypeCount(); ++i) { - //include columns must in reader field, otherwise createRowReader will throw exception - auto pos = std::find(include_cols.begin(), include_cols.end(), - _row_reader->getSelectedType().getFieldName(i)); - _position_in_orc_original.at(std::distance(include_cols.begin(), pos)) = orc_index++; - } - return Status::OK(); - } -} - -void ORCScanner::close() { - BaseScanner::close(); - _batch = nullptr; - _reader.reset(nullptr); - _row_reader.reset(nullptr); -} - -} // namespace doris diff --git a/be/src/exec/orc_scanner.h b/be/src/exec/orc_scanner.h deleted file mode 100644 index 7ee4ab0b61..0000000000 --- a/be/src/exec/orc_scanner.h +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "exec/base_scanner.h" - -namespace doris { - -// Broker scanner convert the data read from broker to doris's tuple. -class ORCScanner : public BaseScanner { -public: - ORCScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter); - - ~ORCScanner() override; - - // Open this scanner, will initialize information need to - Status open() override; - - // Get next tuple - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override; - - // Close this scanner - void close() override; - -private: - // Read next buffer from reader - Status open_next_reader(); - -private: - // Reader - bool _cur_file_eof; - - // orc file reader object - orc::ReaderOptions _options; - orc::RowReaderOptions _row_reader_options; - std::shared_ptr _batch; - std::unique_ptr _reader; - std::unique_ptr _row_reader; - // The batch after reading from orc data is arranged in the original order, - // so we need to record the index in the original order to correspond the column names to the order - std::vector _position_in_orc_original; - int _num_of_columns_from_file; - - int _total_groups; // groups in a orc file - int _current_group; - int64_t _rows_of_group; // rows in a group. - int64_t _current_line_of_group; -}; - -} // namespace doris diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp deleted file mode 100644 index 074f7d35a7..0000000000 --- a/be/src/exec/parquet_scanner.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/parquet_scanner.h" - -#include "exec/arrow/parquet_reader.h" -#include "io/file_factory.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/stream_load/stream_load_pipe.h" - -namespace doris { -using namespace ErrorCode; - -ParquetScanner::ParquetScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter) - : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), - // _splittable(params.splittable), - _cur_file_reader(nullptr), - _cur_file_eof(false) {} - -ParquetScanner::~ParquetScanner() { - close(); -} - -Status ParquetScanner::open() { - return BaseScanner::open(); -} - -Status ParquetScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { - SCOPED_TIMER(_read_timer); - // Get one line - while (!_scanner_eof) { - if (_cur_file_reader == nullptr || _cur_file_eof) { - RETURN_IF_ERROR(open_next_reader()); - // If there isn't any more reader, break this - if (_scanner_eof) { - continue; - } - _cur_file_eof = false; - } - RETURN_IF_ERROR(_cur_file_reader->read(_src_tuple, tuple_pool, &_cur_file_eof)); - // range of current file - const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); - if (range.__isset.num_of_columns_from_file) { - fill_slots_of_columns_from_path(range.num_of_columns_from_file, - range.columns_from_path); - } - - COUNTER_UPDATE(_rows_read_counter, 1); - SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(fill_dest_tuple(tuple, tuple_pool, fill_tuple)); - break; // break always - } - - *eof = _scanner_eof; - return Status::OK(); -} - -Status ParquetScanner::open_next_reader() { - // open_file_reader - if (_cur_file_reader != nullptr) { - if (_stream_load_pipe != nullptr) { - _stream_load_pipe.reset(); - _cur_file_reader = nullptr; - } else { - delete _cur_file_reader; - _cur_file_reader = nullptr; - } - } - - while (true) { - if (_next_range >= _ranges.size()) { - _scanner_eof = true; - return Status::OK(); - } - const TBrokerRangeDesc& range = _ranges[_next_range++]; - std::unique_ptr file_reader; - RETURN_IF_ERROR(FileFactory::create_file_reader( - range.file_type, _state->exec_env(), _profile, _broker_addresses, - _params.properties, range, range.start_offset, file_reader)); - RETURN_IF_ERROR(file_reader->open()); - - if (file_reader->size() == 0) { - file_reader->close(); - continue; - } - int32_t num_of_columns_from_file = _src_slot_descs.size(); - if (range.__isset.num_of_columns_from_file) { - num_of_columns_from_file = range.num_of_columns_from_file; - } - _cur_file_reader = new ParquetReaderWrap(_state, _src_slot_descs, file_reader.release(), - num_of_columns_from_file, 0, 0); - auto tuple_desc = _state->desc_tbl().get_tuple_descriptor(_tupleId); - Status status = - _cur_file_reader->init_reader(tuple_desc, _conjunct_ctxs, _state->timezone()); - if (status.is()) { - continue; - } else { - if (!status.ok()) { - return Status::InternalError("file: {}, error:{}", range.path, status.to_string()); - } else { - RETURN_IF_ERROR(_cur_file_reader->init_parquet_type()); - return status; - } - } - } -} - -void ParquetScanner::close() { - BaseScanner::close(); - if (_cur_file_reader != nullptr) { - if (_stream_load_pipe != nullptr) { - _stream_load_pipe.reset(); - _cur_file_reader = nullptr; - } else { - delete _cur_file_reader; - _cur_file_reader = nullptr; - } - } -} - -} // namespace doris diff --git a/be/src/exec/parquet_scanner.h b/be/src/exec/parquet_scanner.h deleted file mode 100644 index d66802dd95..0000000000 --- a/be/src/exec/parquet_scanner.h +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "exec/base_scanner.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/mem_pool.h" -#include "util/runtime_profile.h" -#include "util/slice.h" - -namespace doris { - -class Tuple; -class SlotDescriptor; -struct Slice; -class ParquetReaderWrap; -class RuntimeState; -class ExprContext; -class TupleDescriptor; -class TupleRow; -class RowDescriptor; -class RuntimeProfile; -class StreamLoadPipe; - -// Broker scanner convert the data read from broker to doris's tuple. -class ParquetScanner : public BaseScanner { -public: - ParquetScanner(RuntimeState* state, RuntimeProfile* profile, - const TBrokerScanRangeParams& params, - const std::vector& ranges, - const std::vector& broker_addresses, - const std::vector& pre_filter_texprs, ScannerCounter* counter); - - ~ParquetScanner() override; - - // Open this scanner, will initialize information need to - Status open() override; - - // Get next tuple - Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override; - - Status get_next(vectorized::Block* block, bool* eof) override { - return Status::NotSupported("Not Implemented get block"); - } - - // Close this scanner - void close() override; - -protected: - // Read next buffer from reader - Status open_next_reader(); - -protected: - // Reader - ParquetReaderWrap* _cur_file_reader; - bool _cur_file_eof; // is read over? - - // used to hold current StreamLoadPipe - std::shared_ptr _stream_load_pipe; -}; - -} // namespace doris diff --git a/be/src/exec/parquet_writer.cpp b/be/src/exec/parquet_writer.cpp deleted file mode 100644 index b21a0b869f..0000000000 --- a/be/src/exec/parquet_writer.cpp +++ /dev/null @@ -1,583 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/parquet_writer.h" - -#include -#include -#include -#include - -#include "io/file_writer.h" -#include "util/mysql_global.h" -#include "util/types.h" - -namespace doris { - -/// ParquetOutputStream -ParquetOutputStream::ParquetOutputStream(FileWriter* file_writer) - : _file_writer(file_writer), _cur_pos(0), _written_len(0) { - set_mode(arrow::io::FileMode::WRITE); -} - -ParquetOutputStream::~ParquetOutputStream() { - arrow::Status st = Close(); - if (!st.ok()) { - LOG(WARNING) << "close parquet file error: " << st.ToString(); - } -} - -void ParquetBuildHelper::build_schema_repetition_type( - parquet::Repetition::type& parquet_repetition_type, - const TParquetRepetitionType::type& column_repetition_type) { - switch (column_repetition_type) { - case TParquetRepetitionType::REQUIRED: { - parquet_repetition_type = parquet::Repetition::REQUIRED; - break; - } - case TParquetRepetitionType::REPEATED: { - parquet_repetition_type = parquet::Repetition::REPEATED; - break; - } - case TParquetRepetitionType::OPTIONAL: { - parquet_repetition_type = parquet::Repetition::OPTIONAL; - break; - } - default: - parquet_repetition_type = parquet::Repetition::UNDEFINED; - } -} - -void ParquetBuildHelper::build_schema_data_type(parquet::Type::type& parquet_data_type, - const TParquetDataType::type& column_data_type) { - switch (column_data_type) { - case TParquetDataType::BOOLEAN: { - parquet_data_type = parquet::Type::BOOLEAN; - break; - } - case TParquetDataType::INT32: { - parquet_data_type = parquet::Type::INT32; - break; - } - case TParquetDataType::INT64: { - parquet_data_type = parquet::Type::INT64; - break; - } - case TParquetDataType::INT96: { - parquet_data_type = parquet::Type::INT96; - break; - } - case TParquetDataType::BYTE_ARRAY: { - parquet_data_type = parquet::Type::BYTE_ARRAY; - break; - } - case TParquetDataType::FLOAT: { - parquet_data_type = parquet::Type::FLOAT; - break; - } - case TParquetDataType::DOUBLE: { - parquet_data_type = parquet::Type::DOUBLE; - break; - } - case TParquetDataType::FIXED_LEN_BYTE_ARRAY: { - parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY; - break; - } - default: - parquet_data_type = parquet::Type::UNDEFINED; - } -} - -void ParquetBuildHelper::build_compression_type( - parquet::WriterProperties::Builder& builder, - const TParquetCompressionType::type& compression_type) { - switch (compression_type) { - case TParquetCompressionType::SNAPPY: { - builder.compression(parquet::Compression::SNAPPY); - break; - } - case TParquetCompressionType::GZIP: { - builder.compression(parquet::Compression::GZIP); - break; - } - case TParquetCompressionType::BROTLI: { - builder.compression(parquet::Compression::BROTLI); - break; - } - case TParquetCompressionType::ZSTD: { - builder.compression(parquet::Compression::ZSTD); - break; - } - case TParquetCompressionType::LZ4: { - builder.compression(parquet::Compression::LZ4); - break; - } - case TParquetCompressionType::LZO: { - builder.compression(parquet::Compression::LZO); - break; - } - case TParquetCompressionType::BZ2: { - builder.compression(parquet::Compression::BZ2); - break; - } - case TParquetCompressionType::UNCOMPRESSED: { - builder.compression(parquet::Compression::UNCOMPRESSED); - break; - } - default: - builder.compression(parquet::Compression::UNCOMPRESSED); - } -} - -void ParquetBuildHelper::build_version(parquet::WriterProperties::Builder& builder, - const TParquetVersion::type& parquet_version) { - switch (parquet_version) { - case TParquetVersion::PARQUET_1_0: { - builder.version(parquet::ParquetVersion::PARQUET_1_0); - break; - } - case TParquetVersion::PARQUET_2_LATEST: { - builder.version(parquet::ParquetVersion::PARQUET_2_LATEST); - break; - } - default: - builder.version(parquet::ParquetVersion::PARQUET_1_0); - } -} - -arrow::Status ParquetOutputStream::Write(const void* data, int64_t nbytes) { - if (_is_closed) { - return arrow::Status::OK(); - } - size_t written_len = 0; - Status st = _file_writer->write(static_cast(data), nbytes, &written_len); - if (!st.ok()) { - return arrow::Status::IOError(st.to_string()); - } - _cur_pos += written_len; - _written_len += written_len; - return arrow::Status::OK(); -} - -arrow::Result ParquetOutputStream::Tell() const { - return _cur_pos; -} - -arrow::Status ParquetOutputStream::Close() { - if (_is_closed) { - return arrow::Status::OK(); - } - Status st = _file_writer->close(); - if (!st.ok()) { - LOG(WARNING) << "close parquet output stream failed: " << st; - return arrow::Status::IOError(st.to_string()); - } - _is_closed = true; - return arrow::Status::OK(); -} - -int64_t ParquetOutputStream::get_written_len() { - return _written_len; -} - -void ParquetOutputStream::set_written_len(int64_t written_len) { - _written_len = written_len; -} - -ParquetWriterWrapper::ParquetWriterWrapper(FileWriter* file_writer, - const std::vector& output_expr_ctxs, - const std::map& properties, - const std::vector>& schema, - bool output_object_data) - : _output_expr_ctxs(output_expr_ctxs), - _str_schema(schema), - _cur_writed_rows(0), - _rg_writer(nullptr), - _output_object_data(output_object_data) { - _outstream = std::shared_ptr(new ParquetOutputStream(file_writer)); - parse_properties(properties); - parse_schema(schema); - init_parquet_writer(); -} - -void ParquetWriterWrapper::parse_properties( - const std::map& propertie_map) { - parquet::WriterProperties::Builder builder; - for (auto it = propertie_map.begin(); it != propertie_map.end(); it++) { - std::string property_name = it->first; - std::string property_value = it->second; - if (property_name == "compression") { - // UNCOMPRESSED, SNAPPY, GZIP, BROTLI, ZSTD, LZ4, LZO, BZ2 - if (property_value == "snappy") { - builder.compression(parquet::Compression::SNAPPY); - } else if (property_value == "gzip") { - builder.compression(parquet::Compression::GZIP); - } else if (property_value == "brotli") { - builder.compression(parquet::Compression::BROTLI); - } else if (property_value == "zstd") { - builder.compression(parquet::Compression::ZSTD); - } else if (property_value == "lz4") { - builder.compression(parquet::Compression::LZ4); - } else if (property_value == "lzo") { - builder.compression(parquet::Compression::LZO); - } else if (property_value == "bz2") { - builder.compression(parquet::Compression::BZ2); - } else { - builder.compression(parquet::Compression::UNCOMPRESSED); - } - } else if (property_name == "disable_dictionary") { - if (property_value == "true") { - builder.enable_dictionary(); - } else { - builder.disable_dictionary(); - } - } else if (property_name == "version") { - if (property_value == "v1") { - builder.version(parquet::ParquetVersion::PARQUET_1_0); - } else { - builder.version(parquet::ParquetVersion::PARQUET_2_LATEST); - } - } - } - _properties = builder.build(); -} - -Status ParquetWriterWrapper::parse_schema(const std::vector>& schema) { - parquet::schema::NodeVector fields; - for (auto column = schema.begin(); column != schema.end(); column++) { - std::string repetition_type = (*column)[0]; - parquet::Repetition::type parquet_repetition_type = parquet::Repetition::REQUIRED; - if (repetition_type.find("required") != std::string::npos) { - parquet_repetition_type = parquet::Repetition::REQUIRED; - } else if (repetition_type.find("repeated") != std::string::npos) { - parquet_repetition_type = parquet::Repetition::REPEATED; - } else if (repetition_type.find("optional") != std::string::npos) { - parquet_repetition_type = parquet::Repetition::OPTIONAL; - } else { - parquet_repetition_type = parquet::Repetition::UNDEFINED; - } - - std::string data_type = (*column)[1]; - parquet::Type::type parquet_data_type = parquet::Type::BYTE_ARRAY; - if (data_type == "boolean") { - parquet_data_type = parquet::Type::BOOLEAN; - } else if (data_type.find("int32") != std::string::npos) { - parquet_data_type = parquet::Type::INT32; - } else if (data_type.find("int64") != std::string::npos) { - parquet_data_type = parquet::Type::INT64; - } else if (data_type.find("int96") != std::string::npos) { - parquet_data_type = parquet::Type::INT96; - } else if (data_type.find("float") != std::string::npos) { - parquet_data_type = parquet::Type::FLOAT; - } else if (data_type.find("double") != std::string::npos) { - parquet_data_type = parquet::Type::DOUBLE; - } else if (data_type.find("byte_array") != std::string::npos) { - parquet_data_type = parquet::Type::BYTE_ARRAY; - } else if (data_type.find("fixed_len_byte_array") != std::string::npos) { - parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY; - } else { - parquet_data_type = parquet::Type::UNDEFINED; - } - - std::string column_name = (*column)[2]; - fields.push_back(parquet::schema::PrimitiveNode::Make(column_name, parquet_repetition_type, - parquet::LogicalType::None(), - parquet_data_type)); - _schema = std::static_pointer_cast( - parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields)); - } - return Status::OK(); -} - -/// ParquetWriterWrapper -ParquetWriterWrapper::ParquetWriterWrapper(FileWriter* file_writer, - const std::vector& output_expr_ctxs, - const std::vector& parquet_schemas, - const TParquetCompressionType::type& compression_type, - const bool& parquet_disable_dictionary, - const TParquetVersion::type& parquet_version, - bool output_object_data) - : _output_expr_ctxs(output_expr_ctxs), - _cur_writed_rows(0), - _rg_writer(nullptr), - _output_object_data(output_object_data) { - _outstream = std::shared_ptr(new ParquetOutputStream(file_writer)); - parse_properties(compression_type, parquet_disable_dictionary, parquet_version); - parse_schema(parquet_schemas); - init_parquet_writer(); -} - -void ParquetWriterWrapper::parse_properties(const TParquetCompressionType::type& compression_type, - const bool& parquet_disable_dictionary, - const TParquetVersion::type& parquet_version) { - parquet::WriterProperties::Builder builder; - ParquetBuildHelper::build_compression_type(builder, compression_type); - ParquetBuildHelper::build_version(builder, parquet_version); - if (parquet_disable_dictionary) { - builder.disable_dictionary(); - } else { - builder.enable_dictionary(); - } - _properties = builder.build(); -} - -void ParquetWriterWrapper::parse_schema(const std::vector& parquet_schemas) { - parquet::schema::NodeVector fields; - parquet::Repetition::type parquet_repetition_type; - parquet::Type::type parquet_data_type; - for (int idx = 0; idx < parquet_schemas.size(); ++idx) { - ParquetBuildHelper::build_schema_repetition_type( - parquet_repetition_type, parquet_schemas[idx].schema_repetition_type); - ParquetBuildHelper::build_schema_data_type(parquet_data_type, - parquet_schemas[idx].schema_data_type); - fields.push_back(parquet::schema::PrimitiveNode::Make( - parquet_schemas[idx].schema_column_name, parquet_repetition_type, - parquet::LogicalType::None(), parquet_data_type)); - _schema = std::static_pointer_cast( - parquet::schema::GroupNode::Make("schema", parquet::Repetition::REQUIRED, fields)); - } -} - -Status ParquetWriterWrapper::write(const RowBatch& row_batch) { - int num_rows = row_batch.num_rows(); - for (int i = 0; i < num_rows; ++i) { - TupleRow* row = row_batch.get_row(i); - RETURN_IF_ERROR(_write_one_row(row)); - _cur_writed_rows++; - } - return Status::OK(); -} - -Status ParquetWriterWrapper::init_parquet_writer() { - _writer = parquet::ParquetFileWriter::Open(_outstream, _schema, _properties); - if (_writer == nullptr) { - return Status::InternalError("Failed to create file writer"); - } - return Status::OK(); -} - -parquet::RowGroupWriter* ParquetWriterWrapper::get_rg_writer() { - if (_rg_writer == nullptr) { - _rg_writer = _writer->AppendBufferedRowGroup(); - } - if (_cur_writed_rows > _max_row_per_group) { - _rg_writer->Close(); - _rg_writer = _writer->AppendBufferedRowGroup(); - _cur_writed_rows = 0; - } - return _rg_writer; -} - -template -void ParquetWriterWrapper::write_int32_column(int index, T* item) { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int32Writer* col_writer = static_cast(rgWriter->column(index)); - int32_t value = 0; - if (item != nullptr) { - value = *item; - } - col_writer->WriteBatch(1, nullptr, nullptr, &value); -} - -Status ParquetWriterWrapper::_write_one_row(TupleRow* row) { - int num_columns = _output_expr_ctxs.size(); - try { - for (int index = 0; index < num_columns; ++index) { - void* item = _output_expr_ctxs[index]->get_value(row); - switch (_output_expr_ctxs[index]->root()->type().type) { - case TYPE_BOOLEAN: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::BoolWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - col_writer->WriteBatch(1, nullptr, nullptr, static_cast(item)); - } else { - bool default_bool = false; - col_writer->WriteBatch(1, nullptr, nullptr, &default_bool); - } - break; - } - case TYPE_TINYINT: { - write_int32_column(index, static_cast(item)); - break; - } - case TYPE_SMALLINT: { - write_int32_column(index, static_cast(item)); - break; - } - case TYPE_INT: { - write_int32_column(index, static_cast(item)); - break; - } - case TYPE_BIGINT: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int64Writer* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - col_writer->WriteBatch(1, nullptr, nullptr, (int64_t*)(item)); - } else { - int64_t default_int644 = 0; - col_writer->WriteBatch(1, nullptr, nullptr, &default_int644); - } - break; - } - case TYPE_LARGEINT: { - // TODO: not support int_128 - // It is better write a default value, because rg_writer need all columns has value before flush to disk. - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int64Writer* col_writer = - static_cast(rgWriter->column(index)); - int64_t default_int64 = 0; - col_writer->WriteBatch(1, nullptr, nullptr, &default_int64); - return Status::InvalidArgument("do not support large int type."); - } - case TYPE_FLOAT: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::FloatWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - col_writer->WriteBatch(1, nullptr, nullptr, (float_t*)(item)); - } else { - float_t default_float = 0.0; - col_writer->WriteBatch(1, nullptr, nullptr, &default_float); - } - break; - } - case TYPE_DOUBLE: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::DoubleWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - col_writer->WriteBatch(1, nullptr, nullptr, (double_t*)(item)); - } else { - double_t default_double = 0.0; - col_writer->WriteBatch(1, nullptr, nullptr, &default_double); - } - break; - } - case TYPE_DATETIME: - case TYPE_DATE: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::Int64Writer* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - const DateTimeValue* time_val = (const DateTimeValue*)(item); - int64_t timestamp = time_val->to_olap_datetime(); - col_writer->WriteBatch(1, nullptr, nullptr, ×tamp); - } else { - int64_t default_int64 = 0; - col_writer->WriteBatch(1, nullptr, nullptr, &default_int64); - } - break; - } - - case TYPE_HLL: - case TYPE_OBJECT: { - if (_output_object_data) { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - const StringValue* string_val = (const StringValue*)(item); - parquet::ByteArray value; - value.ptr = reinterpret_cast(string_val->ptr); - value.len = string_val->len; - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } else { - parquet::ByteArray value; - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } - } else { - std::stringstream ss; - ss << "unsupported file format: " - << _output_expr_ctxs[index]->root()->type().type; - return Status::InvalidArgument(ss.str()); - } - break; - } - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - const StringValue* string_val = (const StringValue*)(item); - parquet::ByteArray value; - value.ptr = reinterpret_cast(string_val->ptr); - value.len = string_val->len; - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } else { - parquet::ByteArray value; - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } - break; - } - case TYPE_DECIMALV2: { - parquet::RowGroupWriter* rgWriter = get_rg_writer(); - parquet::ByteArrayWriter* col_writer = - static_cast(rgWriter->column(index)); - if (item != nullptr) { - const DecimalV2Value decimal_val( - reinterpret_cast(item)->value); - char decimal_buffer[MAX_DECIMAL_WIDTH]; - int output_scale = _output_expr_ctxs[index]->root()->output_scale(); - parquet::ByteArray value; - value.ptr = reinterpret_cast(decimal_buffer); - value.len = decimal_val.to_buffer(decimal_buffer, output_scale); - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } else { - parquet::ByteArray value; - col_writer->WriteBatch(1, nullptr, nullptr, &value); - } - break; - } - default: { - std::stringstream ss; - ss << "unsupported file format: " << _output_expr_ctxs[index]->root()->type().type; - return Status::InvalidArgument(ss.str()); - } - } - } - } catch (const std::exception& e) { - LOG(WARNING) << "Parquet write error: " << e.what(); - return Status::InternalError(e.what()); - } - return Status::OK(); -} - -int64_t ParquetWriterWrapper::written_len() { - return _outstream->get_written_len(); -} -void ParquetWriterWrapper::close() { - try { - if (_rg_writer != nullptr) { - _rg_writer->Close(); - _rg_writer = nullptr; - } - _writer->Close(); - arrow::Status st = _outstream->Close(); - if (!st.ok()) { - LOG(WARNING) << "close parquet file error: " << st.ToString(); - } - } catch (const std::exception& e) { - _rg_writer = nullptr; - LOG(WARNING) << "Parquet writer close error: " << e.what(); - } -} - -} // namespace doris diff --git a/be/src/exec/parquet_writer.h b/be/src/exec/parquet_writer.h deleted file mode 100644 index 59b8600129..0000000000 --- a/be/src/exec/parquet_writer.h +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "common/status.h" -#include "exprs/expr_context.h" -#include "runtime/row_batch.h" - -namespace doris { -class FileWriter; - -class ParquetOutputStream : public arrow::io::OutputStream { -public: - ParquetOutputStream(FileWriter* file_writer); - ParquetOutputStream(FileWriter* file_writer, const int64_t& written_len); - virtual ~ParquetOutputStream(); - - arrow::Status Write(const void* data, int64_t nbytes) override; - // return the current write position of the stream - arrow::Result Tell() const override; - arrow::Status Close() override; - - bool closed() const override { return _is_closed; } - - int64_t get_written_len(); - - void set_written_len(int64_t written_len); - -private: - FileWriter* _file_writer; // not owned - int64_t _cur_pos = 0; // current write position - bool _is_closed = false; - int64_t _written_len = 0; -}; - -class ParquetBuildHelper { -public: - static void build_schema_repetition_type( - parquet::Repetition::type& parquet_repetition_type, - const TParquetRepetitionType::type& column_repetition_type); - - static void build_schema_data_type(parquet::Type::type& parquet_data_type, - const TParquetDataType::type& column_data_type); - - static void build_compression_type(parquet::WriterProperties::Builder& builder, - const TParquetCompressionType::type& compression_type); - - static void build_version(parquet::WriterProperties::Builder& builder, - const TParquetVersion::type& parquet_version); -}; - -// a wrapper of parquet output stream -class ParquetWriterWrapper { -public: - //TODO: in order to consider the compatibility when upgrading, could remove this code after 1.2 - ParquetWriterWrapper(FileWriter* file_writer, const std::vector& output_expr_ctxs, - const std::map& properties, - const std::vector>& schema, - bool output_object_data); - void parse_properties(const std::map& propertie_map); - - Status parse_schema(const std::vector>& schema); - - ParquetWriterWrapper(doris::FileWriter* file_writer, - const std::vector& output_vexpr_ctxs, - const std::vector& parquet_schemas, - const TParquetCompressionType::type& compression_type, - const bool& parquet_disable_dictionary, - const TParquetVersion::type& parquet_version, bool output_object_data); - - ~ParquetWriterWrapper() = default; - - Status write(const RowBatch& row_batch); - - Status init_parquet_writer(); - - Status _write_one_row(TupleRow* row); - - void close(); - - void parse_schema(const std::vector& parquet_schemas); - - void parse_properties(const TParquetCompressionType::type& compression_type, - const bool& parquet_disable_dictionary, - const TParquetVersion::type& parquet_version); - - parquet::RowGroupWriter* get_rg_writer(); - - int64_t written_len(); - -private: - template - void write_int32_column(int index, T* item); - - std::shared_ptr _outstream; - std::shared_ptr _properties; - std::shared_ptr _schema; - std::unique_ptr _writer; - const std::vector& _output_expr_ctxs; - std::vector> _str_schema; - int64_t _cur_writed_rows = 0; - parquet::RowGroupWriter* _rg_writer; - const int64_t _max_row_per_group = 10; - bool _output_object_data; -}; - -} // namespace doris diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc deleted file mode 100644 index ba51a1fb28..0000000000 --- a/be/src/exec/partitioned_aggregation_node.cc +++ /dev/null @@ -1,1654 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/partitioned-aggregation-node.cc -// and modified by Doris - -#include "exec/partitioned_aggregation_node.h" - -#include - -#include -#include -#include - -#include "exec/partitioned_hash_table.h" -#include "exec/partitioned_hash_table.inline.h" -#include "exprs/expr_context.h" -#include "exprs/new_agg_fn_evaluator.h" -#include "exprs/slot_ref.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gutil/strings/substitute.h" -#include "runtime/buffered_tuple_stream3.inline.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/mem_pool.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" - -using namespace strings; - -namespace doris { - -/// The minimum reduction factor (input rows divided by output rows) to grow hash tables -/// in a streaming preaggregation, given that the hash tables are currently the given -/// size or above. The sizes roughly correspond to hash table sizes where the bucket -/// arrays will fit in a cache level. Intuitively, we don't want the working set of the -/// aggregation to expand to the next level of cache unless we're reducing the input -/// enough to outweigh the increased memory latency we'll incur for each hash table -/// lookup. -/// -/// Note that the current reduction achieved is not always a good estimate of the -/// final reduction. It may be biased either way depending on the ordering of the -/// input. If the input order is random, we will underestimate the final reduction -/// factor because the probability of a row having the same key as a previous row -/// increases as more input is processed. If the input order is correlated with the -/// key, skew may bias the estimate. If high cardinality keys appear first, we -/// may overestimate and if low cardinality keys appear first, we underestimate. -/// To estimate the eventual reduction achieved, we estimate the final reduction -/// using the planner's estimated input cardinality and the assumption that input -/// is in a random order. This means that we assume that the reduction factor will -/// increase over time. -struct StreamingHtMinReductionEntry { - // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in - // bytes is greater than this threshold. - int min_ht_mem; - // The minimum reduction factor to expand the hash tables. - double streaming_ht_min_reduction; -}; - -// TODO: experimentally tune these values and also programmatically get the cache size -// of the machine that we're running on. -static const StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = { - // Expand up to L2 cache always. - {0, 0.0}, - // Expand into L3 cache if we look like we're getting some reduction. - {256 * 1024, 1.1}, - // Expand into main memory if we're getting a significant reduction. - {2 * 1024 * 1024, 2.0}, -}; - -static const int STREAMING_HT_MIN_REDUCTION_SIZE = - sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]); - -PartitionedAggregationNode::PartitionedAggregationNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - intermediate_tuple_id_(tnode.agg_node.intermediate_tuple_id), - intermediate_tuple_desc_(descs.get_tuple_descriptor(intermediate_tuple_id_)), - intermediate_row_desc_(intermediate_tuple_desc_, false), - output_tuple_id_(tnode.agg_node.output_tuple_id), - output_tuple_desc_(descs.get_tuple_descriptor(output_tuple_id_)), - needs_finalize_(tnode.agg_node.need_finalize), - needs_serialize_(false), - output_partition_(nullptr), - process_batch_no_grouping_fn_(nullptr), - process_batch_fn_(nullptr), - process_batch_streaming_fn_(nullptr), - build_timer_(nullptr), - ht_resize_timer_(nullptr), - ht_resize_counter_(nullptr), - get_results_timer_(nullptr), - num_hash_buckets_(nullptr), - num_hash_filled_buckets_(nullptr), - num_hash_probe_(nullptr), - num_hash_failed_probe_(nullptr), - num_hash_travel_length_(nullptr), - num_hash_collisions_(nullptr), - partitions_created_(nullptr), - max_partition_level_(nullptr), - num_row_repartitioned_(nullptr), - num_repartitions_(nullptr), - num_spilled_partitions_(nullptr), - largest_partition_percent_(nullptr), - streaming_timer_(nullptr), - num_processed_rows_(nullptr), - num_passthrough_rows_(nullptr), - preagg_estimated_reduction_(nullptr), - preagg_streaming_ht_min_reduction_(nullptr), - singleton_output_tuple_(nullptr), - singleton_output_tuple_returned_(true), - partition_eos_(false), - child_eos_(false), - partition_pool_(new ObjectPool()) { - DCHECK_EQ(PARTITION_FANOUT, 1 << NUM_PARTITIONING_BITS); - - if (tnode.agg_node.__isset.use_streaming_preaggregation) { - is_streaming_preagg_ = tnode.agg_node.use_streaming_preaggregation; - if (is_streaming_preagg_) { - DCHECK(_conjunct_ctxs.empty()) << "Preaggs have no conjuncts"; - DCHECK(!tnode.agg_node.grouping_exprs.empty()) << "Streaming preaggs do grouping"; - DCHECK(_limit == -1) << "Preaggs have no limits"; - } - } else { - is_streaming_preagg_ = false; - } -} - -Status PartitionedAggregationNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - DCHECK(intermediate_tuple_desc_ != nullptr); - DCHECK(output_tuple_desc_ != nullptr); - DCHECK_EQ(intermediate_tuple_desc_->slots().size(), output_tuple_desc_->slots().size()); - - const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(Expr::create(tnode.agg_node.grouping_exprs, row_desc, state, &grouping_exprs_)); - // Construct build exprs from intermediate_row_desc_ - for (int i = 0; i < grouping_exprs_.size(); ++i) { - SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i]; - //DCHECK(desc->type().type == TYPE_NULL || desc->type() == grouping_exprs_[i]->type()); - // Hack to avoid TYPE_NULL SlotRefs. - SlotRef* build_expr = - _pool->add(desc->type().type != TYPE_NULL ? new SlotRef(desc) - : new SlotRef(desc, TYPE_BOOLEAN)); - build_exprs_.push_back(build_expr); - // TODO chenhao - RETURN_IF_ERROR(build_expr->prepare(state, intermediate_row_desc_, nullptr)); - if (build_expr->type().is_var_len_string_type()) string_grouping_exprs_.push_back(i); - } - - int j = grouping_exprs_.size(); - for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i, ++j) { - SlotDescriptor* intermediate_slot_desc = intermediate_tuple_desc_->slots()[j]; - SlotDescriptor* output_slot_desc = output_tuple_desc_->slots()[j]; - AggFn* agg_fn; - RETURN_IF_ERROR(AggFn::create(tnode.agg_node.aggregate_functions[i], row_desc, - *intermediate_slot_desc, *output_slot_desc, state, &agg_fn)); - agg_fns_.push_back(agg_fn); - needs_serialize_ |= agg_fn->supports_serialize(); - } - return Status::OK(); -} - -Status PartitionedAggregationNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - state_ = state; - - mem_pool_.reset(new MemPool(mem_tracker_held())); - agg_fn_pool_.reset(new MemPool(mem_tracker_held())); - - ht_resize_timer_ = ADD_TIMER(runtime_profile(), "HTResizeTime"); - get_results_timer_ = ADD_TIMER(runtime_profile(), "GetResultsTime"); - num_processed_rows_ = ADD_COUNTER(runtime_profile(), "RowsProcessed", TUnit::UNIT); - num_hash_buckets_ = ADD_COUNTER(runtime_profile(), "HashBuckets", TUnit::UNIT); - num_hash_filled_buckets_ = ADD_COUNTER(runtime_profile(), "HashFilledBuckets", TUnit::UNIT); - num_hash_probe_ = ADD_COUNTER(runtime_profile(), "HashProbe", TUnit::UNIT); - num_hash_failed_probe_ = ADD_COUNTER(runtime_profile(), "HashFailedProbe", TUnit::UNIT); - num_hash_travel_length_ = ADD_COUNTER(runtime_profile(), "HashTravelLength", TUnit::UNIT); - num_hash_collisions_ = ADD_COUNTER(runtime_profile(), "HashCollisions", TUnit::UNIT); - ht_resize_counter_ = ADD_COUNTER(runtime_profile(), "HTResize", TUnit::UNIT); - partitions_created_ = ADD_COUNTER(runtime_profile(), "PartitionsCreated", TUnit::UNIT); - largest_partition_percent_ = - runtime_profile()->AddHighWaterMarkCounter("LargestPartitionPercent", TUnit::UNIT); - - if (config::enable_quadratic_probing) { - runtime_profile()->add_info_string("Probe Method", "HashTable Quadratic Probing"); - } else { - runtime_profile()->add_info_string("Probe Method", "HashTable Linear Probing"); - } - - if (is_streaming_preagg_) { - runtime_profile()->append_exec_option("Streaming Preaggregation"); - streaming_timer_ = ADD_TIMER(runtime_profile(), "StreamingTime"); - num_passthrough_rows_ = ADD_COUNTER(runtime_profile(), "RowsPassedThrough", TUnit::UNIT); - preagg_estimated_reduction_ = - ADD_COUNTER(runtime_profile(), "ReductionFactorEstimate", TUnit::DOUBLE_VALUE); - preagg_streaming_ht_min_reduction_ = ADD_COUNTER( - runtime_profile(), "ReductionFactorThresholdToExpand", TUnit::DOUBLE_VALUE); - } else { - build_timer_ = ADD_TIMER(runtime_profile(), "BuildTime"); - num_row_repartitioned_ = ADD_COUNTER(runtime_profile(), "RowsRepartitioned", TUnit::UNIT); - num_repartitions_ = ADD_COUNTER(runtime_profile(), "NumRepartitions", TUnit::UNIT); - num_spilled_partitions_ = ADD_COUNTER(runtime_profile(), "SpilledPartitions", TUnit::UNIT); - max_partition_level_ = - runtime_profile()->AddHighWaterMarkCounter("MaxPartitionLevel", TUnit::UNIT); - } - // TODO chenhao - const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(NewAggFnEvaluator::Create(agg_fns_, state, _pool, agg_fn_pool_.get(), - &agg_fn_evals_, row_desc)); - - expr_results_pool_.reset(new MemPool(mem_tracker_held())); - if (!grouping_exprs_.empty()) { - RowDescriptor build_row_desc(intermediate_tuple_desc_, false); - RETURN_IF_ERROR(PartitionedHashTableCtx::Create( - _pool, state, build_exprs_, grouping_exprs_, true, - vector(build_exprs_.size(), true), state->fragment_hash_seed(), - MAX_PARTITION_DEPTH, 1, nullptr, expr_results_pool_.get(), build_row_desc, row_desc, - &ht_ctx_)); - } - // AddCodegenDisabledMessage(state); - return Status::OK(); -} - -Status PartitionedAggregationNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - // Open the child before consuming resources in this node. - RETURN_IF_ERROR(child(0)->open(state)); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - // Claim reservation after the child has been opened to reduce the peak reservation - // requirement. - if (!_buffer_pool_client.is_registered() && !grouping_exprs_.empty()) { - RETURN_IF_ERROR(claim_buffer_reservation(state)); - } - - if (ht_ctx_.get() != nullptr) RETURN_IF_ERROR(ht_ctx_->Open(state)); - RETURN_IF_ERROR(NewAggFnEvaluator::Open(agg_fn_evals_, state)); - if (grouping_exprs_.empty()) { - // Create the single output tuple for this non-grouping agg. This must happen after - // opening the aggregate evaluators. - singleton_output_tuple_ = ConstructSingletonOutputTuple(agg_fn_evals_, mem_pool_.get()); - // Check for failures during NewAggFnEvaluator::Init(). - RETURN_IF_ERROR(state_->query_status()); - singleton_output_tuple_returned_ = false; - } else { - if (ht_allocator_ == nullptr) { - // Allocate 'serialize_stream_' and 'ht_allocator_' on the first Open() call. - ht_allocator_.reset(new Suballocator(state_->exec_env()->buffer_pool(), - &_buffer_pool_client, - _resource_profile.spillable_buffer_size)); - - if (!is_streaming_preagg_ && needs_serialize_) { - serialize_stream_.reset(new BufferedTupleStream3( - state, &intermediate_row_desc_, &_buffer_pool_client, - _resource_profile.spillable_buffer_size)); - RETURN_IF_ERROR(serialize_stream_->Init(id(), false)); - RETURN_IF_ERROR(serialize_stream_->PrepareForWrite()); - DCHECK(serialize_stream_->has_write_iterator()); - } - } - RETURN_IF_ERROR(CreateHashPartitions(0)); - } - - // Streaming preaggregations do all processing in GetNext(). - if (is_streaming_preagg_) return Status::OK(); - - RowBatch batch(child(0)->row_desc(), state->batch_size()); - // Read all the rows from the child and process them. - bool eos = false; - do { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting next from child 0.")); - RETURN_IF_ERROR(_children[0]->get_next(state, &batch, &eos)); - if (UNLIKELY(VLOG_ROW_IS_ON)) { - for (int i = 0; i < batch.num_rows(); ++i) { - TupleRow* row = batch.get_row(i); - VLOG_ROW << "input row: " << row->to_string(_children[0]->row_desc()); - } - } - - SCOPED_TIMER(build_timer_); - if (grouping_exprs_.empty()) { - if (process_batch_no_grouping_fn_ != nullptr) { - RETURN_IF_ERROR(process_batch_no_grouping_fn_(this, &batch)); - } else { - RETURN_IF_ERROR(ProcessBatchNoGrouping(&batch)); - } - } else { - // There is grouping, so we will do partitioned aggregation. - if (process_batch_fn_ != nullptr) { - RETURN_IF_ERROR(process_batch_fn_(this, &batch, ht_ctx_.get())); - } else { - RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); - } - } - batch.reset(); - } while (!eos); - - // The child can be closed at this point in most cases because we have consumed all of - // the input from the child and transfered ownership of the resources we need. The - // exception is if we are inside a subplan expecting to call Open()/GetNext() on the - // child again, - if (!is_in_subplan()) child(0)->close(state); - child_eos_ = true; - - // Done consuming child(0)'s input. Move all the partitions in hash_partitions_ - // to spilled_partitions_ or aggregated_partitions_. We'll finish the processing in - // GetNext(). - if (!grouping_exprs_.empty()) { - RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); - } - return Status::OK(); -} - -Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // 1. `!need_finalize` means this aggregation node not the level two aggregation node - // 2. `grouping_exprs_.size() == 0 ` means is not group by - // 3. `child(0)->rows_returned() == 0` mean not data from child - // in level two aggregation node should return nullptr result - // level one aggregation node set `eos = true` return directly - if (UNLIKELY(grouping_exprs_.size() == 0 && !needs_finalize_ && - child(0)->rows_returned() == 0)) { - *eos = true; - return Status::OK(); - } - // PartitionedAggregationNode is a spill node, GetNextInternal will read tuple from a tuple stream - // then copy the pointer to a RowBatch, it can only guarantee that the life cycle is valid in a batch stage. - // If the ancestor node is a no-spilling blocking node (such as hash_join_node except_node ...) - // these node may acquire a invalid tuple pointer, - // so we should use deep_copy, and copy tuple to the tuple_pool, to ensure tuple not finalized. - // reference issue #5466 - // TODO: if ancestor node don't have a no-spilling blocking node, we could avoid a deep_copy - // we should a flag indicate this node don't have to deep_copy - DCHECK_EQ(row_batch->num_rows(), 0); - RowBatch batch(row_batch->row_desc(), row_batch->capacity()); - int first_row_idx = batch.num_rows(); - RETURN_IF_ERROR(GetNextInternal(state, &batch, eos)); - RETURN_IF_ERROR(HandleOutputStrings(&batch, first_row_idx)); - batch.deep_copy_to(row_batch); - return Status::OK(); -} - -Status PartitionedAggregationNode::HandleOutputStrings(RowBatch* row_batch, int first_row_idx) { - if (!needs_finalize_ && !needs_serialize_) return Status::OK(); - // String data returned by Serialize() or Finalize() is from local expr allocations in - // the agg function contexts, and will be freed on the next GetNext() call by - // FreeLocalAllocations(). The data either needs to be copied out now or sent up the - // plan and copied out by a blocking ancestor. (See IMPALA-3311) - for (const AggFn* agg_fn : agg_fns_) { - const SlotDescriptor& slot_desc = agg_fn->output_slot_desc(); - DCHECK(!slot_desc.type().is_collection_type()) << "producing collections NYI"; - if (!slot_desc.type().is_var_len_string_type()) continue; - if (is_in_subplan()) { - // Copy string data to the row batch's pool. This is more efficient than - // MarkNeedsDeepCopy() in a subplan since we are likely producing many small - // batches. - RETURN_IF_ERROR(CopyStringData(slot_desc, row_batch, first_row_idx, - row_batch->tuple_data_pool())); - } else { - row_batch->mark_needs_deep_copy(); - break; - } - } - return Status::OK(); -} - -Status PartitionedAggregationNode::CopyStringData(const SlotDescriptor& slot_desc, - RowBatch* row_batch, int first_row_idx, - MemPool* pool) { - DCHECK(slot_desc.type().is_var_len_string_type()); - DCHECK_EQ(row_batch->row_desc().tuple_descriptors().size(), 1); - FOREACH_ROW(row_batch, first_row_idx, batch_iter) { - Tuple* tuple = batch_iter.get()->get_tuple(0); - StringValue* sv = reinterpret_cast(tuple->get_slot(slot_desc.tuple_offset())); - if (sv == nullptr || sv->len == 0) continue; - char* new_ptr = reinterpret_cast(pool->try_allocate(sv->len)); - if (UNLIKELY(new_ptr == nullptr)) { - string details = Substitute( - "Cannot perform aggregation at node with id $0." - " Failed to allocate $1 output bytes.", - _id, sv->len); - RETURN_LIMIT_EXCEEDED(state_, details, sv->len); - } - memcpy(new_ptr, sv->ptr, sv->len); - sv->ptr = new_ptr; - } - return Status::OK(); -} - -Status PartitionedAggregationNode::GetNextInternal(RuntimeState* state, RowBatch* row_batch, - bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("New partitioned aggregation, while getting next.")); - // clear tmp expr result alocations - expr_results_pool_->clear(); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - if (grouping_exprs_.empty()) { - // There was no grouping, so evaluate the conjuncts and return the single result row. - // We allow calling GetNext() after eos, so don't return this row again. - if (!singleton_output_tuple_returned_) GetSingletonOutput(row_batch); - singleton_output_tuple_returned_ = true; - *eos = true; - return Status::OK(); - } - - if (!child_eos_) { - // For streaming preaggregations, we process rows from the child as we go. - DCHECK(is_streaming_preagg_); - RETURN_IF_ERROR(GetRowsStreaming(state, row_batch)); - } else if (!partition_eos_) { - RETURN_IF_ERROR(GetRowsFromPartition(state, row_batch)); - } - - *eos = partition_eos_ && child_eos_; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -void PartitionedAggregationNode::GetSingletonOutput(RowBatch* row_batch) { - DCHECK(grouping_exprs_.empty()); - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - Tuple* output_tuple = - GetOutputTuple(agg_fn_evals_, singleton_output_tuple_, row_batch->tuple_data_pool()); - row->set_tuple(0, output_tuple); - if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - } - // Keep the current chunk to amortize the memory allocation over a series - // of Reset()/Open()/GetNext()* calls. - row_batch->tuple_data_pool()->acquire_data(mem_pool_.get(), true); - // This node no longer owns the memory for singleton_output_tuple_. - singleton_output_tuple_ = nullptr; -} - -Status PartitionedAggregationNode::GetRowsFromPartition(RuntimeState* state, RowBatch* row_batch) { - DCHECK(!row_batch->at_capacity()); - if (output_iterator_.AtEnd()) { - // Done with this partition, move onto the next one. - if (output_partition_ != nullptr) { - output_partition_->Close(false); - output_partition_ = nullptr; - } - if (aggregated_partitions_.empty() && spilled_partitions_.empty()) { - // No more partitions, all done. - partition_eos_ = true; - return Status::OK(); - } - // Process next partition. - RETURN_IF_ERROR(NextPartition()); - DCHECK(output_partition_ != nullptr); - } - - SCOPED_TIMER(get_results_timer_); - int count = 0; - const int N = BitUtil::next_power_of_two(state->batch_size()); - // Keeping returning rows from the current partition. - while (!output_iterator_.AtEnd()) { - // This loop can go on for a long time if the conjuncts are very selective. Do query - // maintenance every N iterations. - if ((count++ & (N - 1)) == 0) { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting rows from partition.")); - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - Tuple* intermediate_tuple = output_iterator_.GetTuple(); - Tuple* output_tuple = GetOutputTuple(output_partition_->agg_fn_evals, intermediate_tuple, - row_batch->tuple_data_pool()); - output_iterator_.Next(); - row->set_tuple(0, output_tuple); - // TODO chenhao - // DCHECK_EQ(_conjunct_ctxs.size(), _conjuncts.size()); - if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - if (reached_limit() || row_batch->at_capacity()) { - break; - } - } - } - - COUNTER_SET(num_processed_rows_, num_hash_probe_->value()); - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - partition_eos_ = reached_limit(); - if (output_iterator_.AtEnd()) row_batch->mark_needs_deep_copy(); - - return Status::OK(); -} - -Status PartitionedAggregationNode::GetRowsStreaming(RuntimeState* state, RowBatch* out_batch) { - DCHECK(!child_eos_); - DCHECK(is_streaming_preagg_); - - if (child_batch_ == nullptr) { - child_batch_.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - } - - do { - DCHECK_EQ(out_batch->num_rows(), 0); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state( - "New partitioned aggregation, while getting rows in streaming.")); - - RETURN_IF_ERROR(child(0)->get_next(state, child_batch_.get(), &child_eos_)); - SCOPED_TIMER(streaming_timer_); - - int remaining_capacity[PARTITION_FANOUT]; - bool ht_needs_expansion = false; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* hash_tbl = GetHashTable(i); - remaining_capacity[i] = hash_tbl->NumInsertsBeforeResize(); - ht_needs_expansion |= remaining_capacity[i] < child_batch_->num_rows(); - } - - // Stop expanding hash tables if we're not reducing the input sufficiently. As our - // hash tables expand out of each level of cache hierarchy, every hash table lookup - // will take longer. We also may not be able to expand hash tables because of memory - // pressure. In this case HashTable::CheckAndResize() will fail. In either case we - // should always use the remaining space in the hash table to avoid wasting memory. - if (ht_needs_expansion && ShouldExpandPreaggHashTables()) { - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* ht = GetHashTable(i); - if (remaining_capacity[i] < child_batch_->num_rows()) { - SCOPED_TIMER(ht_resize_timer_); - bool resized; - RETURN_IF_ERROR( - ht->CheckAndResize(child_batch_->num_rows(), ht_ctx_.get(), &resized)); - if (resized) { - remaining_capacity[i] = ht->NumInsertsBeforeResize(); - } - } - } - } - - if (process_batch_streaming_fn_ != nullptr) { - RETURN_IF_ERROR(process_batch_streaming_fn_(this, needs_serialize_, child_batch_.get(), - out_batch, ht_ctx_.get(), - remaining_capacity)); - } else { - RETURN_IF_ERROR(ProcessBatchStreaming(needs_serialize_, child_batch_.get(), out_batch, - ht_ctx_.get(), remaining_capacity)); - } - - child_batch_->reset(); // All rows from child_batch_ were processed. - } while (out_batch->num_rows() == 0 && !child_eos_); - - if (child_eos_) { - child(0)->close(state); - child_batch_.reset(); - RETURN_IF_ERROR(MoveHashPartitions(child(0)->rows_returned())); - } - - _num_rows_returned += out_batch->num_rows(); - COUNTER_SET(num_passthrough_rows_, _num_rows_returned); - return Status::OK(); -} - -bool PartitionedAggregationNode::ShouldExpandPreaggHashTables() const { - int64_t ht_mem = 0; - int64_t ht_rows = 0; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - PartitionedHashTable* ht = hash_partitions_[i]->hash_tbl.get(); - ht_mem += ht->CurrentMemSize(); - ht_rows += ht->size(); - } - - // Need some rows in tables to have valid statistics. - if (ht_rows == 0) return true; - - // Find the appropriate reduction factor in our table for the current hash table sizes. - int cache_level = 0; - while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE && - ht_mem >= STREAMING_HT_MIN_REDUCTION[cache_level + 1].min_ht_mem) { - ++cache_level; - } - - // Compare the number of rows in the hash table with the number of input rows that - // were aggregated into it. Exclude passed through rows from this calculation since - // they were not in hash tables. - const int64_t input_rows = _children[0]->rows_returned(); - const int64_t aggregated_input_rows = input_rows - _num_rows_returned; - // TODO chenhao - double current_reduction = static_cast(aggregated_input_rows) / ht_rows; - - // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be - // inaccurate, which could lead to a divide by zero below. - if (aggregated_input_rows <= 0) return true; - - // Extrapolate the current reduction factor (r) using the formula - // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data - // set, N is the number of input rows, excluding passed-through rows, and n is the - // number of rows inserted or merged into the hash tables. This is a very rough - // approximation but is good enough to be useful. - // TODO: consider collecting more statistics to better estimate reduction. - // double estimated_reduction = aggregated_input_rows >= expected_input_rows - // ? current_reduction - // : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1); - double min_reduction = STREAMING_HT_MIN_REDUCTION[cache_level].streaming_ht_min_reduction; - - // COUNTER_SET(preagg_estimated_reduction_, estimated_reduction); - COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction); - // return estimated_reduction > min_reduction; - return current_reduction > min_reduction; -} - -void PartitionedAggregationNode::CleanupHashTbl(const vector& agg_fn_evals, - PartitionedHashTable::Iterator it) { - if (!needs_finalize_ && !needs_serialize_) return; - - // Iterate through the remaining rows in the hash table and call Serialize/Finalize on - // them in order to free any memory allocated by UDAs. - if (needs_finalize_) { - // Finalize() requires a dst tuple but we don't actually need the result, - // so allocate a single dummy tuple to avoid accumulating memory. - Tuple* dummy_dst = nullptr; - dummy_dst = Tuple::create(output_tuple_desc_->byte_size(), mem_pool_.get()); - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dummy_dst); - it.Next(); - } - } else { - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - it.Next(); - } - } -} - -Status PartitionedAggregationNode::reset(RuntimeState* state) { - DCHECK(!is_streaming_preagg_) << "Cannot reset preaggregation"; - if (!grouping_exprs_.empty()) { - child_eos_ = false; - partition_eos_ = false; - // Reset the HT and the partitions for this grouping agg. - ht_ctx_->set_level(0); - ClosePartitions(); - } - return ExecNode::reset(state); -} - -Status PartitionedAggregationNode::close(RuntimeState* state) { - if (is_closed()) return Status::OK(); - - if (!singleton_output_tuple_returned_) { - GetOutputTuple(agg_fn_evals_, singleton_output_tuple_, mem_pool_.get()); - } - - // Iterate through the remaining rows in the hash table and call Serialize/Finalize on - // them in order to free any memory allocated by UDAs - if (output_partition_ != nullptr) { - CleanupHashTbl(output_partition_->agg_fn_evals, output_iterator_); - output_partition_->Close(false); - } - - ClosePartitions(); - child_batch_.reset(); - - // Close all the agg-fn-evaluators - NewAggFnEvaluator::Close(agg_fn_evals_, state); - - if (expr_results_pool_.get() != nullptr) { - expr_results_pool_->free_all(); - } - if (agg_fn_pool_.get() != nullptr) agg_fn_pool_->free_all(); - if (mem_pool_.get() != nullptr) mem_pool_->free_all(); - if (ht_ctx_.get() != nullptr) ht_ctx_->Close(state); - ht_ctx_.reset(); - if (serialize_stream_.get() != nullptr) { - serialize_stream_->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - Expr::close(grouping_exprs_); - Expr::close(build_exprs_); - AggFn::close(agg_fns_); - return ExecNode::close(state); -} - -PartitionedAggregationNode::Partition::~Partition() { - DCHECK(is_closed); -} - -Status PartitionedAggregationNode::Partition::InitStreams() { - agg_fn_pool.reset(new MemPool(parent->mem_tracker_held())); - DCHECK_EQ(agg_fn_evals.size(), 0); - NewAggFnEvaluator::ShallowClone(parent->partition_pool_.get(), agg_fn_pool.get(), - parent->agg_fn_evals_, &agg_fn_evals); - - // Varlen aggregate function results are stored outside of aggregated_row_stream because - // BufferedTupleStream3 doesn't support relocating varlen data stored in the stream. - auto agg_slot = - parent->intermediate_tuple_desc_->slots().begin() + parent->grouping_exprs_.size(); - std::set external_varlen_slots; - for (; agg_slot != parent->intermediate_tuple_desc_->slots().end(); ++agg_slot) { - if ((*agg_slot)->type().is_var_len_string_type()) { - external_varlen_slots.insert((*agg_slot)->id()); - } - } - - aggregated_row_stream.reset(new BufferedTupleStream3( - parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, external_varlen_slots)); - RETURN_IF_ERROR(aggregated_row_stream->Init(parent->id(), true)); - RETURN_IF_ERROR(aggregated_row_stream->PrepareForWrite()); - - if (!parent->is_streaming_preagg_) { - unaggregated_row_stream.reset(new BufferedTupleStream3( - parent->state_, &(parent->child(0)->row_desc()), &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size)); - // This stream is only used to spill, no need to ever have this pinned. - RETURN_IF_ERROR(unaggregated_row_stream->Init(parent->id(), false)); - // Save memory by waiting until we spill to allocate the write buffer for the - // unaggregated row stream. - DCHECK(!unaggregated_row_stream->has_write_iterator()); - } - return Status::OK(); -} - -Status PartitionedAggregationNode::Partition::InitHashTable(bool* got_memory) { - DCHECK(aggregated_row_stream != nullptr); - DCHECK(hash_tbl == nullptr); - // We use the upper PARTITION_FANOUT num bits to pick the partition so only the - // remaining bits can be used for the hash table. - // TODO: we could switch to 64 bit hashes and then we don't need a max size. - // It might be reasonable to limit individual hash table size for other reasons - // though. Always start with small buffers. - hash_tbl.reset(PartitionedHashTable::Create(parent->ht_allocator_.get(), false, 1, nullptr, - 1L << (32 - NUM_PARTITIONING_BITS), - PAGG_DEFAULT_HASH_TABLE_SZ)); - // Please update the error message in CreateHashPartitions() if initial size of - // hash table changes. - return hash_tbl->Init(got_memory); -} - -Status PartitionedAggregationNode::Partition::SerializeStreamForSpilling() { - DCHECK(!parent->is_streaming_preagg_); - if (parent->needs_serialize_) { - // We need to do a lot more work in this case. This step effectively does a merge - // aggregation in this node. We need to serialize the intermediates, spill the - // intermediates and then feed them into the aggregate function's merge step. - // This is often used when the intermediate is a string type, meaning the current - // (before serialization) in-memory layout is not the on-disk block layout. - // The disk layout does not support mutable rows. We need to rewrite the stream - // into the on disk format. - // TODO: if it happens to not be a string, we could serialize in place. This is - // a future optimization since it is very unlikely to have a serialize phase - // for those UDAs. - DCHECK(parent->serialize_stream_.get() != nullptr); - DCHECK(!parent->serialize_stream_->is_pinned()); - - // Serialize and copy the spilled partition's stream into the new stream. - Status status = Status::OK(); - BufferedTupleStream3* new_stream = parent->serialize_stream_.get(); - PartitionedHashTable::Iterator it = hash_tbl->Begin(parent->ht_ctx_.get()); - while (!it.AtEnd()) { - Tuple* tuple = it.GetTuple(); - it.Next(); - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - if (UNLIKELY(!new_stream->AddRow(reinterpret_cast(&tuple), &status))) { - DCHECK(!status.ok()) << "Stream was unpinned - AddRow() only fails on error"; - // Even if we can't add to new_stream, finish up processing this agg stream to make - // clean up easier (someone has to finalize this stream and we don't want to remember - // where we are). - parent->CleanupHashTbl(agg_fn_evals, it); - hash_tbl->Close(); - hash_tbl.reset(); - aggregated_row_stream->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - return status; - } - } - - aggregated_row_stream->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - aggregated_row_stream.swap(parent->serialize_stream_); - // Recreate the serialize_stream (and reserve 1 buffer) now in preparation for - // when we need to spill again. We need to have this available before we need - // to spill to make sure it is available. This should be acquirable since we just - // freed at least one buffer from this partition's (old) aggregated_row_stream. - parent->serialize_stream_.reset(new BufferedTupleStream3( - parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size)); - status = parent->serialize_stream_->Init(parent->id(), false); - if (status.ok()) { - status = parent->serialize_stream_->PrepareForWrite(); - } - if (!status.ok()) { - hash_tbl->Close(); - hash_tbl.reset(); - return status; - } - DCHECK(parent->serialize_stream_->has_write_iterator()); - } - return Status::OK(); -} - -Status PartitionedAggregationNode::Partition::Spill(bool more_aggregate_rows) { - DCHECK(!parent->is_streaming_preagg_); - DCHECK(!is_closed); - DCHECK(!is_spilled()); - // TODO(ml): enable spill - std::stringstream msg; - msg << "New partitioned Aggregation in spill"; - RETURN_LIMIT_EXCEEDED(parent->state_, msg.str()); - - RETURN_IF_ERROR(SerializeStreamForSpilling()); - - // Free the in-memory result data. - NewAggFnEvaluator::Close(agg_fn_evals, parent->state_); - agg_fn_evals.clear(); - - if (agg_fn_pool.get() != nullptr) { - agg_fn_pool->free_all(); - agg_fn_pool.reset(); - } - - hash_tbl->Close(); - hash_tbl.reset(); - - // Unpin the stream to free memory, but leave a write buffer in place so we can - // continue appending rows to one of the streams in the partition. - DCHECK(aggregated_row_stream->has_write_iterator()); - DCHECK(!unaggregated_row_stream->has_write_iterator()); - if (more_aggregate_rows) { - // aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL_EXCEPT_CURRENT); - } else { - // aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - RETURN_IF_ERROR(unaggregated_row_stream->PrepareForWrite()); - } - - COUNTER_UPDATE(parent->num_spilled_partitions_, 1); - if (parent->num_spilled_partitions_->value() == 1) { - parent->add_runtime_exec_option("Spilled"); - } - return Status::OK(); -} - -void PartitionedAggregationNode::Partition::Close(bool finalize_rows) { - if (is_closed) return; - is_closed = true; - if (aggregated_row_stream.get() != nullptr) { - if (finalize_rows && hash_tbl.get() != nullptr) { - // We need to walk all the rows and Finalize them here so the UDA gets a chance - // to cleanup. If the hash table is gone (meaning this was spilled), the rows - // should have been finalized/serialized in Spill(). - parent->CleanupHashTbl(agg_fn_evals, hash_tbl->Begin(parent->ht_ctx_.get())); - } - aggregated_row_stream->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - if (hash_tbl.get() != nullptr) hash_tbl->Close(); - if (unaggregated_row_stream.get() != nullptr) { - unaggregated_row_stream->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - } - - for (NewAggFnEvaluator* eval : agg_fn_evals) eval->Close(parent->state_); - if (agg_fn_pool.get() != nullptr) agg_fn_pool->free_all(); -} - -Tuple* PartitionedAggregationNode::ConstructSingletonOutputTuple( - const vector& agg_fn_evals, MemPool* pool) { - DCHECK(grouping_exprs_.empty()); - Tuple* output_tuple = Tuple::create(intermediate_tuple_desc_->byte_size(), pool); - InitAggSlots(agg_fn_evals, output_tuple); - return output_tuple; -} - -Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( - const vector& agg_fn_evals, MemPool* pool, Status* status) { - const int fixed_size = intermediate_tuple_desc_->byte_size(); - const int varlen_size = GroupingExprsVarlenSize(); - const int tuple_data_size = fixed_size + varlen_size; - uint8_t* tuple_data = pool->try_allocate(tuple_data_size); - if (UNLIKELY(tuple_data == nullptr)) { - stringstream str; - str << "Memory exceed limit. Cannot perform aggregation at node with id $0. Failed " - << "to allocate $1 bytes for intermediate tuple. " - << "Backend: " << BackendOptions::get_localhost() << ", " - << "fragment: " << print_id(state_->fragment_instance_id()) << " " - << "Used: " << thread_context()->thread_mem_tracker()->consumption() - << ", Limit: " << thread_context()->thread_mem_tracker()->limit() << ". " - << "You can change the limit by session variable exec_mem_limit."; - string details = Substitute(str.str(), _id, tuple_data_size); - *status = thread_context()->thread_mem_tracker()->fragment_mem_limit_exceeded( - state_, details, tuple_data_size); - return nullptr; - } - memset(tuple_data, 0, fixed_size); - Tuple* intermediate_tuple = reinterpret_cast(tuple_data); - uint8_t* varlen_data = tuple_data + fixed_size; - CopyGroupingValues(intermediate_tuple, varlen_data, varlen_size); - InitAggSlots(agg_fn_evals, intermediate_tuple); - return intermediate_tuple; -} - -Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( - const vector& agg_fn_evals, BufferedTupleStream3* stream, - Status* status) { - DCHECK(stream != nullptr && status != nullptr); - // Allocate space for the entire tuple in the stream. - const int fixed_size = intermediate_tuple_desc_->byte_size(); - const int varlen_size = GroupingExprsVarlenSize(); - const int tuple_size = fixed_size + varlen_size; - uint8_t* tuple_data = stream->AddRowCustomBegin(tuple_size, status); - if (UNLIKELY(tuple_data == nullptr)) { - // If we failed to allocate and did not hit an error (indicated by a non-ok status), - // the caller of this function can try to free some space, e.g. through spilling, and - // re-attempt to allocate space for this row. - return nullptr; - } - Tuple* tuple = reinterpret_cast(tuple_data); - tuple->init(fixed_size); - uint8_t* varlen_buffer = tuple_data + fixed_size; - CopyGroupingValues(tuple, varlen_buffer, varlen_size); - InitAggSlots(agg_fn_evals, tuple); - stream->AddRowCustomEnd(tuple_size); - return tuple; -} - -int PartitionedAggregationNode::GroupingExprsVarlenSize() { - int varlen_size = 0; - // TODO: The hash table could compute this as it hashes. - for (int expr_idx : string_grouping_exprs_) { - StringValue* sv = reinterpret_cast(ht_ctx_->ExprValue(expr_idx)); - // Avoid branching by multiplying length by null bit. - varlen_size += sv->len * !ht_ctx_->ExprValueNull(expr_idx); - } - return varlen_size; -} - -// TODO: codegen this function. -void PartitionedAggregationNode::CopyGroupingValues(Tuple* intermediate_tuple, uint8_t* buffer, - int varlen_size) { - // Copy over all grouping slots (the variable length data is copied below). - for (int i = 0; i < grouping_exprs_.size(); ++i) { - SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[i]; - if (ht_ctx_->ExprValueNull(i)) { - intermediate_tuple->set_null(slot_desc->null_indicator_offset()); - } else { - void* src = ht_ctx_->ExprValue(i); - void* dst = intermediate_tuple->get_slot(slot_desc->tuple_offset()); - memcpy(dst, src, slot_desc->slot_size()); - } - } - - for (int expr_idx : string_grouping_exprs_) { - if (ht_ctx_->ExprValueNull(expr_idx)) continue; - - SlotDescriptor* slot_desc = intermediate_tuple_desc_->slots()[expr_idx]; - // ptr and len were already copied to the fixed-len part of string value - StringValue* sv = reinterpret_cast( - intermediate_tuple->get_slot(slot_desc->tuple_offset())); - memcpy(buffer, sv->ptr, sv->len); - sv->ptr = reinterpret_cast(buffer); - buffer += sv->len; - } -} - -// TODO: codegen this function. -void PartitionedAggregationNode::InitAggSlots(const vector& agg_fn_evals, - Tuple* intermediate_tuple) { - vector::const_iterator slot_desc = - intermediate_tuple_desc_->slots().begin() + grouping_exprs_.size(); - for (int i = 0; i < agg_fn_evals.size(); ++i, ++slot_desc) { - // To minimize branching on the UpdateTuple path, initialize the result value so that - // the Add() UDA function can ignore the nullptr bit of its destination value. E.g. for - // SUM(), if we initialize the destination value to 0 (with the nullptr bit set), we can - // just start adding to the destination value (rather than repeatedly checking the - // destination nullptr bit. The codegen'd version of UpdateSlot() exploits this to - // eliminate a branch per value. - // - // For boolean and numeric types, the default values are false/0, so the nullable - // aggregate functions SUM() and AVG() produce the correct result. For MIN()/MAX(), - // initialize the value to max/min possible value for the same effect. - NewAggFnEvaluator* eval = agg_fn_evals[i]; - eval->Init(intermediate_tuple); - } -} - -void PartitionedAggregationNode::UpdateTuple(NewAggFnEvaluator** agg_fn_evals, Tuple* tuple, - TupleRow* row, bool is_merge) { - DCHECK(tuple != nullptr || agg_fns_.empty()); - for (int i = 0; i < agg_fns_.size(); ++i) { - if (is_merge) { - agg_fn_evals[i]->Merge(row->get_tuple(0), tuple); - } else { - agg_fn_evals[i]->Add(row, tuple); - } - } -} - -Tuple* PartitionedAggregationNode::GetOutputTuple(const vector& agg_fn_evals, - Tuple* tuple, MemPool* pool) { - DCHECK(tuple != nullptr || agg_fn_evals.empty()) << tuple; - Tuple* dst = tuple; - if (needs_finalize_ && intermediate_tuple_id_ != output_tuple_id_) { - dst = Tuple::create(output_tuple_desc_->byte_size(), pool); - } - if (needs_finalize_) { - NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dst, - grouping_exprs_.size() == 0 && child(0)->rows_returned() == 0); - } else { - NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); - } - // Copy grouping values from tuple to dst. - // TODO: Codegen this. - if (dst != tuple) { - int num_grouping_slots = grouping_exprs_.size(); - for (int i = 0; i < num_grouping_slots; ++i) { - SlotDescriptor* src_slot_desc = intermediate_tuple_desc_->slots()[i]; - SlotDescriptor* dst_slot_desc = output_tuple_desc_->slots()[i]; - bool src_slot_null = tuple->is_null(src_slot_desc->null_indicator_offset()); - void* src_slot = nullptr; - if (!src_slot_null) src_slot = tuple->get_slot(src_slot_desc->tuple_offset()); - RawValue::write(src_slot, dst, dst_slot_desc, nullptr); - } - } - return dst; -} - -template -Status PartitionedAggregationNode::AppendSpilledRow(Partition* partition, TupleRow* row) { - DCHECK(!is_streaming_preagg_); - DCHECK(partition->is_spilled()); - BufferedTupleStream3* stream = AGGREGATED_ROWS ? partition->aggregated_row_stream.get() - : partition->unaggregated_row_stream.get(); - DCHECK(!stream->is_pinned()); - Status status; - if (LIKELY(stream->AddRow(row, &status))) return Status::OK(); - RETURN_IF_ERROR(status); - - // Keep trying to free memory by spilling until we succeed or hit an error. - // Running out of partitions to spill is treated as an error by SpillPartition(). - while (true) { - RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); - if (stream->AddRow(row, &status)) return Status::OK(); - RETURN_IF_ERROR(status); - } -} - -string PartitionedAggregationNode::DebugString(int indentation_level) const { - stringstream ss; - DebugString(indentation_level, &ss); - return ss.str(); -} - -void PartitionedAggregationNode::DebugString(int indentation_level, stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "PartitionedAggregationNode(" - << "intermediate_tuple_id=" << intermediate_tuple_id_ - << " output_tuple_id=" << output_tuple_id_ << " needs_finalize=" << needs_finalize_ - << " grouping_exprs=" << Expr::debug_string(grouping_exprs_) - << " agg_exprs=" << AggFn::debug_string(agg_fns_); - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -Status PartitionedAggregationNode::CreateHashPartitions(int level, int single_partition_idx) { - if (is_streaming_preagg_) DCHECK_EQ(level, 0); - if (UNLIKELY(level >= MAX_PARTITION_DEPTH)) { - stringstream error_msg; - error_msg << "Cannot perform aggregation at hash aggregation node with id " << _id << '.' - << " The input data was partitioned the maximum number of " << MAX_PARTITION_DEPTH - << " times." - << " This could mean there is significant skew in the data or the memory limit is" - << " set too low."; - return state_->set_mem_limit_exceeded(error_msg.str()); - } - ht_ctx_->set_level(level); - - DCHECK(hash_partitions_.empty()); - int num_partitions_created = 0; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - hash_tbls_[i] = nullptr; - if (single_partition_idx == -1 || i == single_partition_idx) { - Partition* new_partition = partition_pool_->add(new Partition(this, level, i)); - ++num_partitions_created; - hash_partitions_.push_back(new_partition); - RETURN_IF_ERROR(new_partition->InitStreams()); - } else { - hash_partitions_.push_back(nullptr); - } - } - - // Now that all the streams are reserved (meaning we have enough memory to execute - // the algorithm), allocate the hash tables. These can fail and we can still continue. - for (int i = 0; i < PARTITION_FANOUT; ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - if (partition->aggregated_row_stream == nullptr) { - // Failed to create the aggregated row stream - cannot create a hash table. - // Just continue with a nullptr hash table so rows will be passed through. - DCHECK(is_streaming_preagg_); - } else { - bool got_memory; - RETURN_IF_ERROR(partition->InitHashTable(&got_memory)); - // Spill the partition if we cannot create a hash table for a merge aggregation. - if (UNLIKELY(!got_memory)) { - // If we're repartitioning, we will be writing aggregated rows first. - RETURN_IF_ERROR(partition->Spill(level > 0)); - } - } - hash_tbls_[i] = partition->hash_tbl.get(); - } - // In this case we did not have to repartition, so ensure that while building the hash - // table all rows will be inserted into the partition at 'single_partition_idx' in case - // a non deterministic grouping expression causes a row to hash to a different - // partition index. - if (single_partition_idx != -1) { - Partition* partition = hash_partitions_[single_partition_idx]; - for (int i = 0; i < PARTITION_FANOUT; ++i) { - hash_partitions_[i] = partition; - hash_tbls_[i] = partition->hash_tbl.get(); - } - } - - COUNTER_UPDATE(partitions_created_, num_partitions_created); - if (!is_streaming_preagg_) { - COUNTER_SET(max_partition_level_, level); - } - return Status::OK(); -} - -Status PartitionedAggregationNode::CheckAndResizeHashPartitions( - bool partitioning_aggregated_rows, int num_rows, const PartitionedHashTableCtx* ht_ctx) { - DCHECK(!is_streaming_preagg_); - for (int i = 0; i < PARTITION_FANOUT; ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - while (!partition->is_spilled()) { - { - SCOPED_TIMER(ht_resize_timer_); - bool resized; - RETURN_IF_ERROR(partition->hash_tbl->CheckAndResize(num_rows, ht_ctx, &resized)); - if (resized) break; - } - RETURN_IF_ERROR(SpillPartition(partitioning_aggregated_rows)); - } - } - return Status::OK(); -} - -Status PartitionedAggregationNode::NextPartition() { - DCHECK(output_partition_ == nullptr); - - // Keep looping until we get to a partition that fits in memory. - Partition* partition = nullptr; - while (true) { - // First return partitions that are fully aggregated (and in memory). - if (!aggregated_partitions_.empty()) { - partition = aggregated_partitions_.front(); - DCHECK(!partition->is_spilled()); - aggregated_partitions_.pop_front(); - break; - } - - // Try to fit a single spilled partition in memory. We can often do this because - // we only need to fit 1/PARTITION_FANOUT of the data in memory. - // TODO: in some cases when the partition probably won't fit in memory it could - // be better to skip directly to repartitioning. - RETURN_IF_ERROR(BuildSpilledPartition(&partition)); - if (partition != nullptr) break; - - // If we can't fit the partition in memory, repartition it. - RETURN_IF_ERROR(RepartitionSpilledPartition()); - } - DCHECK(!partition->is_spilled()); - DCHECK(partition->hash_tbl.get() != nullptr); - DCHECK(partition->aggregated_row_stream->is_pinned()); - - output_partition_ = partition; - output_iterator_ = output_partition_->hash_tbl->Begin(ht_ctx_.get()); - COUNTER_UPDATE(num_hash_buckets_, output_partition_->hash_tbl->num_buckets()); - COUNTER_UPDATE(ht_resize_counter_, output_partition_->hash_tbl->num_resize()); - COUNTER_UPDATE(num_hash_filled_buckets_, output_partition_->hash_tbl->num_filled_buckets()); - COUNTER_UPDATE(num_hash_probe_, output_partition_->hash_tbl->num_probe()); - COUNTER_UPDATE(num_hash_failed_probe_, output_partition_->hash_tbl->num_failed_probe()); - COUNTER_UPDATE(num_hash_travel_length_, output_partition_->hash_tbl->travel_length()); - COUNTER_UPDATE(num_hash_collisions_, output_partition_->hash_tbl->NumHashCollisions()); - - return Status::OK(); -} - -Status PartitionedAggregationNode::BuildSpilledPartition(Partition** built_partition) { - DCHECK(!spilled_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. - Partition* src_partition = spilled_partitions_.front(); - DCHECK(src_partition->is_spilled()); - - // Create a new hash partition from the rows of the spilled partition. This is simpler - // than trying to finish building a partially-built partition in place. We only - // initialise one hash partition that all rows in 'src_partition' will hash to. - RETURN_IF_ERROR(CreateHashPartitions(src_partition->level, src_partition->idx)); - Partition* dst_partition = hash_partitions_[src_partition->idx]; - DCHECK(dst_partition != nullptr); - - // Rebuild the hash table over spilled aggregate rows then start adding unaggregated - // rows to the hash table. It's possible the partition will spill at either stage. - // In that case we need to finish processing 'src_partition' so that all rows are - // appended to 'dst_partition'. - // TODO: if the partition spills again but the aggregation reduces the input - // significantly, we could do better here by keeping the incomplete hash table in - // memory and only spilling unaggregated rows that didn't fit in the hash table - // (somewhat similar to the passthrough pre-aggregation). - RETURN_IF_ERROR(ProcessStream(src_partition->aggregated_row_stream.get())); - RETURN_IF_ERROR(ProcessStream(src_partition->unaggregated_row_stream.get())); - src_partition->Close(false); - spilled_partitions_.pop_front(); - hash_partitions_.clear(); - - if (dst_partition->is_spilled()) { - PushSpilledPartition(dst_partition); - *built_partition = nullptr; - } else { - *built_partition = dst_partition; - } - return Status::OK(); -} - -Status PartitionedAggregationNode::RepartitionSpilledPartition() { - DCHECK(!spilled_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - // Leave the partition in 'spilled_partitions_' to be closed if we hit an error. - Partition* partition = spilled_partitions_.front(); - DCHECK(partition->is_spilled()); - - // Create the new hash partitions to repartition into. This will allocate a - // write buffer for each partition's aggregated row stream. - RETURN_IF_ERROR(CreateHashPartitions(partition->level + 1)); - COUNTER_UPDATE(num_repartitions_, 1); - - // Rows in this partition could have been spilled into two streams, depending - // on if it is an aggregated intermediate, or an unaggregated row. Aggregated - // rows are processed first to save a hash table lookup in ProcessBatch(). - RETURN_IF_ERROR(ProcessStream(partition->aggregated_row_stream.get())); - - // Prepare write buffers so we can append spilled rows to unaggregated partitions. - for (Partition* hash_partition : hash_partitions_) { - if (!hash_partition->is_spilled()) continue; - // The aggregated rows have been repartitioned. Free up at least a buffer's worth of - // reservation and use it to pin the unaggregated write buffer. - // hash_partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - RETURN_IF_ERROR(hash_partition->unaggregated_row_stream->PrepareForWrite()); - } - RETURN_IF_ERROR(ProcessStream(partition->unaggregated_row_stream.get())); - - COUNTER_UPDATE(num_row_repartitioned_, partition->aggregated_row_stream->num_rows()); - COUNTER_UPDATE(num_row_repartitioned_, partition->unaggregated_row_stream->num_rows()); - - partition->Close(false); - spilled_partitions_.pop_front(); - - // Done processing this partition. Move the new partitions into - // spilled_partitions_/aggregated_partitions_. - int64_t num_input_rows = partition->aggregated_row_stream->num_rows() + - partition->unaggregated_row_stream->num_rows(); - RETURN_IF_ERROR(MoveHashPartitions(num_input_rows)); - return Status::OK(); -} - -template -Status PartitionedAggregationNode::ProcessStream(BufferedTupleStream3* input_stream) { - DCHECK(!is_streaming_preagg_); - if (input_stream->num_rows() > 0) { - RETURN_IF_ERROR(input_stream->PrepareForRead(true)); - - bool eos = false; - const RowDescriptor* desc = - AGGREGATED_ROWS ? &intermediate_row_desc_ : &(_children[0]->row_desc()); - RowBatch batch(*desc, state_->batch_size()); - do { - RETURN_IF_ERROR(input_stream->GetNext(&batch, &eos)); - RETURN_IF_ERROR(ProcessBatch(&batch, ht_ctx_.get())); - RETURN_IF_ERROR(state_->check_query_state( - "New partitioned aggregation, while processing stream.")); - batch.reset(); - } while (!eos); - } - input_stream->Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES); - return Status::OK(); -} - -Status PartitionedAggregationNode::SpillPartition(bool more_aggregate_rows) { - int64_t max_freed_mem = 0; - int partition_idx = -1; - - // Iterate over the partitions and pick the largest partition that is not spilled. - for (int i = 0; i < hash_partitions_.size(); ++i) { - if (hash_partitions_[i] == nullptr) continue; - if (hash_partitions_[i]->is_closed) continue; - if (hash_partitions_[i]->is_spilled()) continue; - // Pass 'true' because we need to keep the write block pinned. See Partition::Spill(). - int64_t mem = hash_partitions_[i]->aggregated_row_stream->BytesPinned(true); - mem += hash_partitions_[i]->hash_tbl->ByteSize(); - mem += hash_partitions_[i]->agg_fn_pool->total_reserved_bytes(); - DCHECK_GT(mem, 0); // At least the hash table buckets should occupy memory. - if (mem > max_freed_mem) { - max_freed_mem = mem; - partition_idx = i; - } - } - DCHECK_NE(partition_idx, -1) << "Should have been able to spill a partition to " - << "reclaim memory: " << _buffer_pool_client.DebugString(); - // Remove references to the destroyed hash table from 'hash_tbls_'. - // Additionally, we might be dealing with a rebuilt spilled partition, where all - // partitions point to a single in-memory partition. This also ensures that 'hash_tbls_' - // remains consistent in that case. - for (int i = 0; i < PARTITION_FANOUT; ++i) { - if (hash_partitions_[i] == hash_partitions_[partition_idx]) hash_tbls_[i] = nullptr; - } - return hash_partitions_[partition_idx]->Spill(more_aggregate_rows); -} - -Status PartitionedAggregationNode::MoveHashPartitions(int64_t num_input_rows) { - DCHECK(!hash_partitions_.empty()); - std::stringstream ss; - ss << "PA(node_id=" << id() << ") partitioned(level=" << hash_partitions_[0]->level << ") " - << num_input_rows << " rows into:" << std::endl; - for (int i = 0; i < hash_partitions_.size(); ++i) { - Partition* partition = hash_partitions_[i]; - if (partition == nullptr) continue; - // We might be dealing with a rebuilt spilled partition, where all partitions are - // pointing to a single in-memory partition, so make sure we only proceed for the - // right partition. - if (i != partition->idx) continue; - int64_t aggregated_rows = 0; - if (partition->aggregated_row_stream != nullptr) { - aggregated_rows = partition->aggregated_row_stream->num_rows(); - } - int64_t unaggregated_rows = 0; - if (partition->unaggregated_row_stream != nullptr) { - unaggregated_rows = partition->unaggregated_row_stream->num_rows(); - } - double total_rows = aggregated_rows + unaggregated_rows; - double percent = total_rows * 100 / num_input_rows; - ss << " " << i << " " << (partition->is_spilled() ? "spilled" : "not spilled") - << " (fraction=" << std::fixed << std::setprecision(2) << percent << "%)" << std::endl - << " #aggregated rows:" << aggregated_rows << std::endl - << " #unaggregated rows: " << unaggregated_rows << std::endl; - - // TODO: update counters to support doubles. - COUNTER_SET(largest_partition_percent_, static_cast(percent)); - - if (total_rows == 0) { - partition->Close(false); - } else if (partition->is_spilled()) { - PushSpilledPartition(partition); - } else { - aggregated_partitions_.push_back(partition); - } - } - VLOG_CRITICAL << ss.str(); - hash_partitions_.clear(); - return Status::OK(); -} - -void PartitionedAggregationNode::PushSpilledPartition(Partition* partition) { - DCHECK(partition->is_spilled()); - DCHECK(partition->hash_tbl == nullptr); - // Ensure all pages in the spilled partition's streams are unpinned by invalidating - // the streams' read and write iterators. We may need all the memory to process the - // next spilled partitions. - // partition->aggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - // partition->unaggregated_row_stream->UnpinStream(BufferedTupleStream3::UNPIN_ALL); - spilled_partitions_.push_front(partition); -} - -void PartitionedAggregationNode::ClosePartitions() { - for (Partition* partition : hash_partitions_) { - if (partition != nullptr) partition->Close(true); - } - hash_partitions_.clear(); - for (Partition* partition : aggregated_partitions_) partition->Close(true); - aggregated_partitions_.clear(); - for (Partition* partition : spilled_partitions_) partition->Close(true); - spilled_partitions_.clear(); - memset(hash_tbls_, 0, sizeof(hash_tbls_)); - partition_pool_->clear(); -} - -//Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) { -// NewAggFnEvaluator::FreeLocalAllocations(agg_fn_evals_); -// for (Partition* partition : hash_partitions_) { -// if (partition != nullptr) { -// NewAggFnEvaluator::FreeLocalAllocations(partition->agg_fn_evals); -// } -// } -// if (ht_ctx_.get() != nullptr) ht_ctx_->FreeLocalAllocations(); -// return ExecNode::QueryMaintenance(state); -//} - -// Instantiate required templates. -template Status PartitionedAggregationNode::AppendSpilledRow(Partition*, TupleRow*); -template Status PartitionedAggregationNode::AppendSpilledRow(Partition*, TupleRow*); - -Status PartitionedAggregationNode::ProcessBatchNoGrouping(RowBatch* batch) { - Tuple* output_tuple = singleton_output_tuple_; - FOREACH_ROW(batch, 0, batch_iter) { - UpdateTuple(agg_fn_evals_.data(), output_tuple, batch_iter.get()); - } - return Status::OK(); -} - -template -Status PartitionedAggregationNode::ProcessBatch(RowBatch* batch, PartitionedHashTableCtx* ht_ctx) { - DCHECK(!hash_partitions_.empty()); - DCHECK(!is_streaming_preagg_); - - // Make sure that no resizes will happen when inserting individual rows to the hash - // table of each partition by pessimistically assuming that all the rows in each batch - // will end up to the same partition. - // TODO: Once we have a histogram with the number of rows per partition, we will have - // accurate resize calls. - RETURN_IF_ERROR(CheckAndResizeHashPartitions(AGGREGATED_ROWS, batch->num_rows(), ht_ctx)); - - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int cache_size = expr_vals_cache->capacity(); - const int num_rows = batch->num_rows(); - for (int group_start = 0; group_start < num_rows; group_start += cache_size) { - EvalAndHashPrefetchGroup(batch, group_start, ht_ctx); - - FOREACH_ROW_LIMIT(batch, group_start, cache_size, batch_iter) { - RETURN_IF_ERROR(ProcessRow(batch_iter.get(), ht_ctx)); - expr_vals_cache->NextRow(); - } - ht_ctx->expr_results_pool_->clear(); - DCHECK(expr_vals_cache->AtEnd()); - } - return Status::OK(); -} - -template -void PartitionedAggregationNode::EvalAndHashPrefetchGroup(RowBatch* batch, int start_row_idx, - PartitionedHashTableCtx* ht_ctx) { - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int cache_size = expr_vals_cache->capacity(); - - expr_vals_cache->Reset(); - FOREACH_ROW_LIMIT(batch, start_row_idx, cache_size, batch_iter) { - TupleRow* row = batch_iter.get(); - bool is_null; - if (AGGREGATED_ROWS) { - is_null = !ht_ctx->EvalAndHashBuild(row); - } else { - is_null = !ht_ctx->EvalAndHashProbe(row); - } - // Hoist lookups out of non-null branch to speed up non-null case. - const uint32_t hash = expr_vals_cache->CurExprValuesHash(); - const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); - PartitionedHashTable* hash_tbl = GetHashTable(partition_idx); - if (is_null) { - expr_vals_cache->SetRowNull(); - } else if (config::enable_prefetch) { - if (LIKELY(hash_tbl != nullptr)) hash_tbl->PrefetchBucket(hash); - } - expr_vals_cache->NextRow(); - } - - expr_vals_cache->ResetForRead(); -} - -template -Status PartitionedAggregationNode::ProcessRow(TupleRow* row, PartitionedHashTableCtx* ht_ctx) { - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - // Hoist lookups out of non-null branch to speed up non-null case. - const uint32_t hash = expr_vals_cache->CurExprValuesHash(); - const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); - if (expr_vals_cache->IsRowNull()) return Status::OK(); - // To process this row, we first see if it can be aggregated or inserted into this - // partition's hash table. If we need to insert it and that fails, due to OOM, we - // spill the partition. The partition to spill is not necessarily dst_partition, - // so we can try again to insert the row. - PartitionedHashTable* hash_tbl = GetHashTable(partition_idx); - Partition* dst_partition = hash_partitions_[partition_idx]; - DCHECK(dst_partition != nullptr); - DCHECK_EQ(dst_partition->is_spilled(), hash_tbl == nullptr); - if (hash_tbl == nullptr) { - // This partition is already spilled, just append the row. - return AppendSpilledRow(dst_partition, row); - } - - DCHECK(dst_partition->aggregated_row_stream->is_pinned()); - bool found; - // Find the appropriate bucket in the hash table. There will always be a free - // bucket because we checked the size above. - PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); - DCHECK(!it.AtEnd()) << "Hash table had no free buckets"; - if (AGGREGATED_ROWS) { - // If the row is already an aggregate row, it cannot match anything in the - // hash table since we process the aggregate rows first. These rows should - // have been aggregated in the initial pass. - DCHECK(!found); - } else if (found) { - // Row is already in hash table. Do the aggregation and we're done. - UpdateTuple(dst_partition->agg_fn_evals.data(), it.GetTuple(), row); - return Status::OK(); - } - - // If we are seeing this result row for the first time, we need to construct the - // result row and initialize it. - return AddIntermediateTuple(dst_partition, row, hash, it); -} - -template -Status PartitionedAggregationNode::AddIntermediateTuple(Partition* partition, TupleRow* row, - uint32_t hash, - PartitionedHashTable::Iterator insert_it) { - while (true) { - DCHECK(partition->aggregated_row_stream->is_pinned()); - Tuple* intermediate_tuple = ConstructIntermediateTuple( - partition->agg_fn_evals, partition->aggregated_row_stream.get(), - &process_batch_status_); - - if (LIKELY(intermediate_tuple != nullptr)) { - UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, row, AGGREGATED_ROWS); - // After copying and initializing the tuple, insert it into the hash table. - insert_it.SetTuple(intermediate_tuple, hash); - return Status::OK(); - } else if (!process_batch_status_.ok()) { - return std::move(process_batch_status_); - } - - // We did not have enough memory to add intermediate_tuple to the stream. - RETURN_IF_ERROR(SpillPartition(AGGREGATED_ROWS)); - if (partition->is_spilled()) { - return AppendSpilledRow(partition, row); - } - } -} - -Status PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize, RowBatch* in_batch, - RowBatch* out_batch, - PartitionedHashTableCtx* ht_ctx, - int remaining_capacity[PARTITION_FANOUT]) { - DCHECK(is_streaming_preagg_); - DCHECK_EQ(out_batch->num_rows(), 0); - DCHECK_LE(in_batch->num_rows(), out_batch->capacity()); - - RowBatch::Iterator out_batch_iterator(out_batch, out_batch->num_rows()); - PartitionedHashTableCtx::ExprValuesCache* expr_vals_cache = ht_ctx->expr_values_cache(); - const int num_rows = in_batch->num_rows(); - const int cache_size = expr_vals_cache->capacity(); - for (int group_start = 0; group_start < num_rows; group_start += cache_size) { - EvalAndHashPrefetchGroup(in_batch, group_start, ht_ctx); - - FOREACH_ROW_LIMIT(in_batch, group_start, cache_size, in_batch_iter) { - // Hoist lookups out of non-null branch to speed up non-null case. - TupleRow* in_row = in_batch_iter.get(); - const uint32_t hash = expr_vals_cache->CurExprValuesHash(); - const uint32_t partition_idx = hash >> (32 - NUM_PARTITIONING_BITS); - if (!expr_vals_cache->IsRowNull() && - !TryAddToHashTable(ht_ctx, hash_partitions_[partition_idx], - GetHashTable(partition_idx), in_row, hash, - &remaining_capacity[partition_idx], &process_batch_status_)) { - RETURN_IF_ERROR(std::move(process_batch_status_)); - // Tuple is not going into hash table, add it to the output batch. - Tuple* intermediate_tuple = ConstructIntermediateTuple( - agg_fn_evals_, out_batch->tuple_data_pool(), &process_batch_status_); - if (UNLIKELY(intermediate_tuple == nullptr)) { - DCHECK(!process_batch_status_.ok()); - return std::move(process_batch_status_); - } - UpdateTuple(agg_fn_evals_.data(), intermediate_tuple, in_row); - out_batch_iterator.get()->set_tuple(0, intermediate_tuple); - out_batch_iterator.next(); - out_batch->commit_last_row(); - } - DCHECK(process_batch_status_.ok()); - expr_vals_cache->NextRow(); - } - ht_ctx->expr_results_pool_->clear(); - DCHECK(expr_vals_cache->AtEnd()); - } - if (needs_serialize) { - FOREACH_ROW(out_batch, 0, out_batch_iter) { - NewAggFnEvaluator::Serialize(agg_fn_evals_, out_batch_iter.get()->get_tuple(0)); - } - } - - return Status::OK(); -} - -bool PartitionedAggregationNode::TryAddToHashTable(PartitionedHashTableCtx* ht_ctx, - Partition* partition, - PartitionedHashTable* hash_tbl, TupleRow* in_row, - uint32_t hash, int* remaining_capacity, - Status* status) { - DCHECK(remaining_capacity != nullptr); - DCHECK_EQ(hash_tbl, partition->hash_tbl.get()); - DCHECK_GE(*remaining_capacity, 0); - bool found; - // This is called from ProcessBatchStreaming() so the rows are not aggregated. - PartitionedHashTable::Iterator it = hash_tbl->FindBuildRowBucket(ht_ctx, &found); - Tuple* intermediate_tuple; - if (found) { - intermediate_tuple = it.GetTuple(); - } else if (*remaining_capacity == 0) { - return false; - } else { - intermediate_tuple = ConstructIntermediateTuple( - partition->agg_fn_evals, partition->aggregated_row_stream.get(), status); - if (LIKELY(intermediate_tuple != nullptr)) { - it.SetTuple(intermediate_tuple, hash); - --(*remaining_capacity); - } else { - // Avoid repeatedly trying to add tuples when under memory pressure. - *remaining_capacity = 0; - return false; - } - } - UpdateTuple(partition->agg_fn_evals.data(), intermediate_tuple, in_row); - return true; -} - -// Instantiate required templates. -template Status PartitionedAggregationNode::ProcessBatch(RowBatch*, - PartitionedHashTableCtx*); -template Status PartitionedAggregationNode::ProcessBatch(RowBatch*, PartitionedHashTableCtx*); - -} // namespace doris diff --git a/be/src/exec/partitioned_aggregation_node.h b/be/src/exec/partitioned_aggregation_node.h deleted file mode 100644 index c5d9a505d1..0000000000 --- a/be/src/exec/partitioned_aggregation_node.h +++ /dev/null @@ -1,696 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/partitioned-aggregation-node.h -// and modified by Doris - -#pragma once - -#include - -#include "exec/exec_node.h" -#include "exec/partitioned_hash_table.h" -#include "runtime/buffered_tuple_stream3.h" -#include "runtime/bufferpool/suballocator.h" -#include "runtime/descriptors.h" // for TupleId -#include "runtime/mem_pool.h" - -namespace doris { - -class AggFn; -class NewAggFnEvaluator; -class CodegenAnyVal; -class RowBatch; -class RuntimeState; -struct StringValue; -class Tuple; -class TupleDescriptor; -class SlotDescriptor; - -/// Node for doing partitioned hash aggregation. -/// This node consumes the input (which can be from the child(0) or a spilled partition). -/// 1. Each row is hashed and we pick a dst partition (hash_partitions_). -/// 2. If the dst partition is not spilled, we probe into the partitions hash table -/// to aggregate/insert the row. -/// 3. If the partition is already spilled, the input row is spilled. -/// 4. When all the input is consumed, we walk hash_partitions_, put the spilled ones -/// into spilled_partitions_ and the non-spilled ones into aggregated_partitions_. -/// aggregated_partitions_ contain partitions that are fully processed and the result -/// can just be returned. Partitions in spilled_partitions_ need to be repartitioned -/// and we just repeat these steps. -// -/// Each partition contains these structures: -/// 1) Hash Table for aggregated rows. This contains just the hash table directory -/// structure but not the rows themselves. This is nullptr for spilled partitions when -/// we stop maintaining the hash table. -/// 2) MemPool for var-len result data for rows in the hash table. If the aggregate -/// function returns a string, we cannot append it to the tuple stream as that -/// structure is immutable. Instead, when we need to spill, we sweep and copy the -/// rows into a tuple stream. -/// 3) Aggregated tuple stream for rows that are/were in the hash table. This stream -/// contains rows that are aggregated. When the partition is not spilled, this stream -/// is pinned and contains the memory referenced by the hash table. -/// In the case where the aggregate function does not return a string (meaning the -/// size of all the slots is known when the row is constructed), this stream contains -/// all the memory for the result rows and the MemPool (2) is not used. -/// 4) Aggregated tuple stream. Stream to spill aggregated rows. -/// Rows in this stream always have child(0)'s layout. -/// -/// Buffering: Each stream and hash table needs to maintain at least one buffer for -/// some duration of the processing. To minimize the memory requirements of small queries -/// (i.e. memory usage is less than one IO-buffer per partition), the streams and hash -/// tables of each partition start using small (less than IO-sized) buffers, regardless -/// of the level. -/// -/// Two-phase aggregation: we support two-phase distributed aggregations, where -/// pre-aggregrations attempt to reduce the size of data before shuffling data across the -/// network to be merged by the merge aggregation node. This exec node supports a -/// streaming mode for pre-aggregations where it maintains a hash table of aggregated -/// rows, but can pass through unaggregated rows (after transforming them into the -/// same tuple format as aggregated rows) when a heuristic determines that it is better -/// to send rows across the network instead of consuming additional memory and CPU -/// resources to expand its hash table. The planner decides whether a given -/// pre-aggregation should use the streaming preaggregation algorithm or the same -/// blocking aggregation algorithm as used in merge aggregations. -/// TODO: make this less of a heuristic by factoring in the cost of the exchange vs the -/// cost of the pre-aggregation. -/// -/// If there are no grouping expressions, there is only a single output row for both -/// preaggregations and merge aggregations. This case is handled separately to avoid -/// building hash tables. There is also no need to do streaming preaggregations. -/// -/// Handling memory pressure: the node uses two different strategies for responding to -/// memory pressure, depending on whether it is a streaming pre-aggregation or not. If -/// the node is a streaming preaggregation, it stops growing its hash table further by -/// converting unaggregated rows into the aggregated tuple format and passing them -/// through. If the node is not a streaming pre-aggregation, it responds to memory -/// pressure by spilling partitions to disk. -/// -/// TODO: Buffer rows before probing into the hash table? -/// TODO: After spilling, we can still maintain a very small hash table just to remove -/// some number of rows (from likely going to disk). -/// TODO: Consider allowing to spill the hash table structure in addition to the rows. -/// TODO: Do we want to insert a buffer before probing into the partition's hash table? -/// TODO: Use a prefetch/batched probe interface. -/// TODO: Return rows from the aggregated_row_stream rather than the HT. -/// TODO: Think about spilling heuristic. -/// TODO: When processing a spilled partition, we have a lot more information and can -/// size the partitions/hash tables better. -/// TODO: Start with unpartitioned (single partition) and switch to partitioning and -/// spilling only if the size gets large, say larger than the LLC. -/// TODO: Simplify or cleanup the various uses of agg_fn_ctx, agg_fn_ctx_, and ctx. -/// There are so many contexts in use that a plain "ctx" variable should never be used. -/// Likewise, it's easy to mixup the agg fn ctxs, there should be a way to simplify this. -/// TODO: support an Init() method with an initial value in the UDAF interface. -class PartitionedAggregationNode : public ExecNode { -public: - PartitionedAggregationNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state); - virtual Status prepare(RuntimeState* state); - // virtual void Codegen(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status reset(RuntimeState* state); - virtual Status close(RuntimeState* state); - -protected: - /// Frees local allocations from aggregate_evals_ and agg_fn_evals - // virtual Status QueryMaintenance(RuntimeState* state); - virtual std::string DebugString(int indentation_level) const; - virtual void DebugString(int indentation_level, std::stringstream* out) const; - -private: - struct Partition; - - /// Number of initial partitions to create. Must be a power of 2. - static const int PARTITION_FANOUT = 16; - - /// Needs to be the log(PARTITION_FANOUT). - /// We use the upper bits to pick the partition and lower bits in the HT. - /// TODO: different hash functions here too? We don't need that many bits to pick - /// the partition so this might be okay. - static const int NUM_PARTITIONING_BITS = 4; - - /// Maximum number of times we will repartition. The maximum build table we can process - /// (if we have enough scratch disk space) in case there is no skew is: - /// MEM_LIMIT * (PARTITION_FANOUT ^ MAX_PARTITION_DEPTH). - /// In the case where there is skew, repartitioning is unlikely to help (assuming a - /// reasonable hash function). - /// Note that we need to have at least as many SEED_PRIMES in PartitionedHashTableCtx. - /// TODO: we can revisit and try harder to explicitly detect skew. - static const int MAX_PARTITION_DEPTH = 16; - - /// Default initial number of buckets in a hash table. - /// TODO: rethink this ? - static const int64_t PAGG_DEFAULT_HASH_TABLE_SZ = 1024; - - /// Codegen doesn't allow for automatic Status variables because then exception - /// handling code is needed to destruct the Status, and our function call substitution - /// doesn't know how to deal with the LLVM IR 'invoke' instruction. Workaround that by - /// placing the Status here so exceptions won't need to destruct it. - /// TODO: fix IMPALA-1948 and remove this. - Status process_batch_status_; - - /// Tuple into which Update()/Merge()/Serialize() results are stored. - TupleId intermediate_tuple_id_; - TupleDescriptor* intermediate_tuple_desc_; - - /// Row with the intermediate tuple as its only tuple. - /// Construct a new row desc for preparing the build exprs because neither the child's - /// nor this node's output row desc may contain the intermediate tuple, e.g., - /// in a single-node plan with an intermediate tuple different from the output tuple. - /// Lives in the query state's obj_pool. - RowDescriptor intermediate_row_desc_; - - /// Tuple into which Finalize() results are stored. Possibly the same as - /// the intermediate tuple. - TupleId output_tuple_id_; - TupleDescriptor* output_tuple_desc_; - - /// Certain aggregates require a finalize step, which is the final step of the - /// aggregate after consuming all input rows. The finalize step converts the aggregate - /// value into its final form. This is true if this node contains aggregate that - /// requires a finalize step. - const bool needs_finalize_; - - /// True if this is first phase of a two-phase distributed aggregation for which we - /// are doing a streaming preaggregation. - bool is_streaming_preagg_; - - /// True if any of the evaluators require the serialize step. - bool needs_serialize_; - - /// The list of all aggregate operations for this exec node. - std::vector agg_fns_; - - /// Evaluators for each aggregate function. If this is a grouping aggregation, these - /// evaluators are only used to create cloned per-partition evaluators. The cloned - /// evaluators are then used to evaluate the functions. If this is a non-grouping - /// aggregation these evaluators are used directly to evaluate the functions. - /// - /// Permanent and result allocations for these allocators are allocated from - /// 'expr_perm_pool_' and 'expr_results_pool_' respectively. - std::vector agg_fn_evals_; - std::unique_ptr agg_fn_pool_; - - /// Exprs used to evaluate input rows - std::vector grouping_exprs_; - - /// Exprs used to insert constructed aggregation tuple into the hash table. - /// All the exprs are simply SlotRefs for the intermediate tuple. - std::vector build_exprs_; - - /// Exprs used to evaluate input rows - /// TODO (pengyubing) Is this variable useful? - std::vector grouping_expr_ctxs_; - - /// Indices of grouping exprs with var-len string types in grouping_expr_ctxs_. We need - /// to do more work for var-len expressions when allocating and spilling rows. All - /// var-len grouping exprs have type string. - std::vector string_grouping_exprs_; - - RuntimeState* state_; - /// Allocator for hash table memory. - std::unique_ptr ht_allocator_; - /// MemPool used to allocate memory for when we don't have grouping and don't initialize - /// the partitioning structures, or during Close() when creating new output tuples. - /// For non-grouping aggregations, the ownership of the pool's memory is transferred - /// to the output batch on eos. The pool should not be Reset() to allow amortizing - /// memory allocation over a series of Reset()/Open()/GetNext()* calls. - std::unique_ptr mem_pool_; - - // MemPool for allocations made by copying expr results - std::unique_ptr expr_results_pool_; - - /// The current partition and iterator to the next row in its hash table that we need - /// to return in GetNext() - Partition* output_partition_; - PartitionedHashTable::Iterator output_iterator_; - - typedef Status (*ProcessBatchNoGroupingFn)(PartitionedAggregationNode*, RowBatch*); - /// Jitted ProcessBatchNoGrouping function pointer. Null if codegen is disabled. - ProcessBatchNoGroupingFn process_batch_no_grouping_fn_; - - typedef Status (*ProcessBatchFn)(PartitionedAggregationNode*, RowBatch*, - PartitionedHashTableCtx*); - /// Jitted ProcessBatch function pointer. Null if codegen is disabled. - ProcessBatchFn process_batch_fn_; - - typedef Status (*ProcessBatchStreamingFn)(PartitionedAggregationNode*, bool, RowBatch*, - RowBatch*, PartitionedHashTableCtx*, - int[PARTITION_FANOUT]); - /// Jitted ProcessBatchStreaming function pointer. Null if codegen is disabled. - ProcessBatchStreamingFn process_batch_streaming_fn_; - - /// Time spent processing the child rows - RuntimeProfile::Counter* build_timer_; - - /// Total time spent resizing hash tables. - RuntimeProfile::Counter* ht_resize_timer_; - - /// Total time of resizing hash tables. - RuntimeProfile::Counter* ht_resize_counter_; - - /// Time spent returning the aggregated rows - RuntimeProfile::Counter* get_results_timer_; - - /// Total number of hash buckets across all partitions. - RuntimeProfile::Counter* num_hash_buckets_; - - /// Total number of hash filled buckets across all partitions. - RuntimeProfile::Counter* num_hash_filled_buckets_; - - /// Total time of probe operation across all partitions. - RuntimeProfile::Counter* num_hash_probe_; - - /// Total time of failed probe operation across all partitions. - RuntimeProfile::Counter* num_hash_failed_probe_; - - /// Total time of travel_length of probe operation across all partitions. - RuntimeProfile::Counter* num_hash_travel_length_; - - /// Total time of hash_collisions across all partitions. - RuntimeProfile::Counter* num_hash_collisions_; - - /// Total number of partitions created. - RuntimeProfile::Counter* partitions_created_; - - /// Level of max partition (i.e. number of repartitioning steps). - RuntimeProfile::HighWaterMarkCounter* max_partition_level_; - - /// Number of rows that have been repartitioned. - RuntimeProfile::Counter* num_row_repartitioned_; - - /// Number of partitions that have been repartitioned. - RuntimeProfile::Counter* num_repartitions_; - - /// Number of partitions that have been spilled. - RuntimeProfile::Counter* num_spilled_partitions_; - - /// The largest fraction after repartitioning. This is expected to be - /// 1 / PARTITION_FANOUT. A value much larger indicates skew. - RuntimeProfile::HighWaterMarkCounter* largest_partition_percent_; - - /// Time spent in streaming preagg algorithm. - RuntimeProfile::Counter* streaming_timer_; - - /// num_processed_rows == num_hash_probe_ add this counter - /// just make the runningprofie more clearly - /// The number of rows which proessed by aggregation. - RuntimeProfile::Counter* num_processed_rows_; - - /// The number of rows passed through without aggregation. - RuntimeProfile::Counter* num_passthrough_rows_; - - /// The estimated reduction of the preaggregation. - RuntimeProfile::Counter* preagg_estimated_reduction_; - - /// Expose the minimum reduction factor to continue growing the hash tables. - RuntimeProfile::Counter* preagg_streaming_ht_min_reduction_; - - ///////////////////////////////////////// - /// BEGIN: Members that must be Reset() - - /// Result of aggregation w/o GROUP BY. - /// Note: can be nullptr even if there is no grouping if the result tuple is 0 width - /// e.g. select 1 from table group by col. - Tuple* singleton_output_tuple_; - bool singleton_output_tuple_returned_; - - /// Row batch used as argument to GetNext() for the child node preaggregations. Store - /// in node to avoid reallocating for every GetNext() call when streaming. - std::unique_ptr child_batch_; - - /// If true, no more rows to output from partitions. - bool partition_eos_; - - /// True if no more rows to process from child. - bool child_eos_; - - /// Used for hash-related functionality, such as evaluating rows and calculating hashes. - /// It also owns the evaluators for the grouping and build expressions used during hash - /// table insertion and probing. - std::unique_ptr ht_ctx_; - - /// Object pool that holds the Partition objects in hash_partitions_. - std::unique_ptr partition_pool_; - - /// Current partitions we are partitioning into. IMPALA-5788: For the case where we - /// rebuild a spilled partition that fits in memory, all pointers in this vector will - /// point to a single in-memory partition. - std::vector hash_partitions_; - - /// Cache for hash tables in 'hash_partitions_'. IMPALA-5788: For the case where we - /// rebuild a spilled partition that fits in memory, all pointers in this array will - /// point to the hash table that is a part of a single in-memory partition. - PartitionedHashTable* hash_tbls_[PARTITION_FANOUT]; - - /// All partitions that have been spilled and need further processing. - std::deque spilled_partitions_; - - /// All partitions that are aggregated and can just return the results in GetNext(). - /// After consuming all the input, hash_partitions_ is split into spilled_partitions_ - /// and aggregated_partitions_, depending on if it was spilled or not. - std::deque aggregated_partitions_; - - /// END: Members that must be Reset() - ///////////////////////////////////////// - - /// The hash table and streams (aggregated and unaggregated) for an individual - /// partition. The streams of each partition always (i.e. regardless of level) - /// initially use small buffers. Streaming pre-aggregations do not spill and do not - /// require an unaggregated stream. - struct Partition { - Partition(PartitionedAggregationNode* parent, int level, int idx) - : parent(parent), is_closed(false), level(level), idx(idx) {} - - ~Partition(); - - /// Initializes aggregated_row_stream and unaggregated_row_stream (if a spilling - /// aggregation), allocating one buffer for each. Spilling merge aggregations must - /// have enough reservation for the initial buffer for the stream, so this should - /// not fail due to OOM. Preaggregations do not reserve any buffers: if does not - /// have enough reservation for the initial buffer, the aggregated row stream is not - /// created and an OK status is returned. - Status InitStreams(); - - /// Initializes the hash table. 'aggregated_row_stream' must be non-nullptr. - /// Sets 'got_memory' to true if the hash table was initialised or false on OOM. - Status InitHashTable(bool* got_memory); - - /// Called in case we need to serialize aggregated rows. This step effectively does - /// a merge aggregation in this node. - Status SerializeStreamForSpilling(); - - /// Closes this partition. If finalize_rows is true, this iterates over all rows - /// in aggregated_row_stream and finalizes them (this is only used in the cancellation - /// path). - void Close(bool finalize_rows); - - /// Spill this partition. 'more_aggregate_rows' = true means that more aggregate rows - /// may be appended to the partition before appending unaggregated rows. On - /// success, one of the streams is left with a write iterator: the aggregated stream - /// if 'more_aggregate_rows' is true or the unaggregated stream otherwise. - Status Spill(bool more_aggregate_rows); - - bool is_spilled() const { return hash_tbl.get() == nullptr; } - - PartitionedAggregationNode* parent; - - /// If true, this partition is closed and there is nothing left to do. - bool is_closed; - - /// How many times rows in this partition have been repartitioned. Partitions created - /// from the node's children's input is level 0, 1 after the first repartitionining, - /// etc. - const int level; - - /// The index of this partition within 'hash_partitions_' at its level. - const int idx; - - /// Hash table for this partition. - /// Can be nullptr if this partition is no longer maintaining a hash table (i.e. - /// is spilled or we are passing through all rows for this partition). - std::unique_ptr hash_tbl; - - /// Clone of parent's agg_fn_evals_. Permanent allocations come from - /// 'agg_fn_perm_pool' and result allocations come from the ExecNode's - /// 'expr_results_pool_'. - std::vector agg_fn_evals; - std::unique_ptr agg_fn_pool; - - /// Tuple stream used to store aggregated rows. When the partition is not spilled, - /// (meaning the hash table is maintained), this stream is pinned and contains the - /// memory referenced by the hash table. When it is spilled, this consumes reservation - /// for a write buffer only during repartitioning of aggregated rows. - /// - /// For streaming preaggs, this may be nullptr if sufficient memory is not available. - /// In that case hash_tbl is also nullptr and all rows for the partition will be passed - /// through. - std::unique_ptr aggregated_row_stream; - - /// Unaggregated rows that are spilled. Always nullptr for streaming pre-aggregations. - /// Always unpinned. Has a write buffer allocated when the partition is spilled and - /// unaggregated rows are being processed. - std::unique_ptr unaggregated_row_stream; - }; - - /// Stream used to store serialized spilled rows. Only used if needs_serialize_ - /// is set. This stream is never pinned and only used in Partition::Spill as a - /// a temporary buffer. - std::unique_ptr serialize_stream_; - - /// Accessor for 'hash_tbls_' that verifies consistency with the partitions. - PartitionedHashTable* ALWAYS_INLINE GetHashTable(int partition_idx) { - PartitionedHashTable* ht = hash_tbls_[partition_idx]; - DCHECK_EQ(ht, hash_partitions_[partition_idx]->hash_tbl.get()); - return ht; - } - - /// Materializes 'row_batch' in either grouping or non-grouping case. - Status GetNextInternal(RuntimeState* state, RowBatch* row_batch, bool* eos); - - /// Helper function called by GetNextInternal() to ensure that string data referenced in - /// 'row_batch' will live as long as 'row_batch's tuples. 'first_row_idx' indexes the - /// first row that should be processed in 'row_batch'. - Status HandleOutputStrings(RowBatch* row_batch, int first_row_idx); - - /// Copies string data from the specified slot into 'pool', and sets the StringValues' - /// ptrs to the copied data. Copies data from all tuples in 'row_batch' from - /// 'first_row_idx' onwards. 'slot_desc' must have a var-len string type. - Status CopyStringData(const SlotDescriptor& slot_desc, RowBatch* row_batch, int first_row_idx, - MemPool* pool); - - /// Constructs singleton output tuple, allocating memory from pool. - Tuple* ConstructSingletonOutputTuple(const std::vector& agg_fn_evals, - MemPool* pool); - - /// Copies grouping values stored in 'ht_ctx_' that were computed over 'current_row_' - /// using 'grouping_expr_evals_'. Aggregation expr slots are set to their initial - /// values. Returns nullptr if there was not enough memory to allocate the tuple or errors - /// occurred. In which case, 'status' is set. Allocates tuple and var-len data for - /// grouping exprs from stream. Var-len data for aggregate exprs is allocated from the - /// FunctionContexts, so is stored outside the stream. If stream's small buffers get - /// full, it will attempt to switch to IO-buffers. - Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, - BufferedTupleStream3* stream, Status* status); - - /// Constructs intermediate tuple, allocating memory from pool instead of the stream. - /// Returns nullptr and sets status if there is not enough memory to allocate the tuple. - Tuple* ConstructIntermediateTuple(const std::vector& agg_fn_evals, - MemPool* pool, Status* status); - - /// Returns the number of bytes of variable-length data for the grouping values stored - /// in 'ht_ctx_'. - int GroupingExprsVarlenSize(); - - /// Initializes intermediate tuple by copying grouping values stored in 'ht_ctx_' that - /// that were computed over 'current_row_' using 'grouping_expr_evals_'. Writes the - /// var-len data into buffer. 'buffer' points to the start of a buffer of at least the - /// size of the variable-length data: 'varlen_size'. - void CopyGroupingValues(Tuple* intermediate_tuple, uint8_t* buffer, int varlen_size); - - /// Initializes the aggregate function slots of an intermediate tuple. - /// Any var-len data is allocated from the FunctionContexts. - void InitAggSlots(const std::vector& agg_fn_evals, - Tuple* intermediate_tuple); - - /// Updates the given aggregation intermediate tuple with aggregation values computed - /// over 'row' using 'agg_fn_evals'. Whether the agg fn evaluator calls Update() or - /// Merge() is controlled by the evaluator itself, unless enforced explicitly by passing - /// in is_merge == true. The override is needed to merge spilled and non-spilled rows - /// belonging to the same partition independent of whether the agg fn evaluators have - /// is_merge() == true. - /// This function is replaced by codegen (which is why we don't use a vector argument - /// for agg_fn_evals).. Any var-len data is allocated from the FunctionContexts. - void UpdateTuple(NewAggFnEvaluator** agg_fn_evals, Tuple* tuple, TupleRow* row, - bool is_merge = false); - - /// Called on the intermediate tuple of each group after all input rows have been - /// consumed and aggregated. Computes the final aggregate values to be returned in - /// GetNext() using the agg fn evaluators' Serialize() or Finalize(). - /// For the Finalize() case if the output tuple is different from the intermediate - /// tuple, then a new tuple is allocated from 'pool' to hold the final result. - /// Grouping values are copied into the output tuple and the output tuple holding - /// the finalized/serialized aggregate values is returned. - /// TODO: Coordinate the allocation of new tuples with the release of memory - /// so as not to make memory consumption blow up. - Tuple* GetOutputTuple(const std::vector& agg_fn_evals, Tuple* tuple, - MemPool* pool); - - /// Do the aggregation for all tuple rows in the batch when there is no grouping. - /// This function is replaced by codegen. - Status ProcessBatchNoGrouping(RowBatch* batch); - - /// Processes a batch of rows. This is the core function of the algorithm. We partition - /// the rows into hash_partitions_, spilling as necessary. - /// If AGGREGATED_ROWS is true, it means that the rows in the batch are already - /// pre-aggregated. - /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, - /// hash table buckets will be prefetched based on the hash values computed. Note - /// that 'prefetch_mode' will be substituted with constants during codegen time. - // - /// This function is replaced by codegen. We pass in ht_ctx_.get() as an argument for - /// performance. - template - Status ProcessBatch(RowBatch* batch, PartitionedHashTableCtx* ht_ctx); - - /// Evaluates the rows in 'batch' starting at 'start_row_idx' and stores the results in - /// the expression values cache in 'ht_ctx'. The number of rows evaluated depends on - /// the capacity of the cache. 'prefetch_mode' specifies the prefetching mode in use. - /// If it's not PREFETCH_NONE, hash table buckets for the computed hashes will be - /// prefetched. Note that codegen replaces 'prefetch_mode' with a constant. - template - void EvalAndHashPrefetchGroup(RowBatch* batch, int start_row_idx, - PartitionedHashTableCtx* ht_ctx); - - /// This function processes each individual row in ProcessBatch(). Must be inlined into - /// ProcessBatch for codegen to substitute function calls with codegen'd versions. - /// May spill partitions if not enough memory is available. - template - Status ProcessRow(TupleRow* row, PartitionedHashTableCtx* ht_ctx); - - /// Create a new intermediate tuple in partition, initialized with row. ht_ctx is - /// the context for the partition's hash table and hash is the precomputed hash of - /// the row. The row can be an unaggregated or aggregated row depending on - /// AGGREGATED_ROWS. Spills partitions if necessary to append the new intermediate - /// tuple to the partition's stream. Must be inlined into ProcessBatch for codegen - /// to substitute function calls with codegen'd versions. insert_it is an iterator - /// for insertion returned from PartitionedHashTable::FindBuildRowBucket(). - template - Status AddIntermediateTuple(Partition* partition, TupleRow* row, uint32_t hash, - PartitionedHashTable::Iterator insert_it); - - /// Append a row to a spilled partition. May spill partitions if needed to switch to - /// I/O buffers. Selects the correct stream according to the argument. Inlined into - /// ProcessBatch(). - template - Status AppendSpilledRow(Partition* partition, TupleRow* row); - - /// Reads all the rows from input_stream and process them by calling ProcessBatch(). - template - Status ProcessStream(BufferedTupleStream3* input_stream); - - /// Output 'singleton_output_tuple_' and transfer memory to 'row_batch'. - void GetSingletonOutput(RowBatch* row_batch); - - /// Get rows for the next rowbatch from the next partition. Sets 'partition_eos_' to - /// true if all rows from all partitions have been returned or the limit is reached. - Status GetRowsFromPartition(RuntimeState* state, RowBatch* row_batch); - - /// Get output rows from child for streaming pre-aggregation. Aggregates some rows with - /// hash table and passes through other rows converted into the intermediate - /// tuple format. Sets 'child_eos_' once all rows from child have been returned. - Status GetRowsStreaming(RuntimeState* state, RowBatch* row_batch); - - /// Return true if we should keep expanding hash tables in the preagg. If false, - /// the preagg should pass through any rows it can't fit in its tables. - bool ShouldExpandPreaggHashTables() const; - - /// Streaming processing of in_batch from child. Rows from child are either aggregated - /// into the hash table or added to 'out_batch' in the intermediate tuple format. - /// 'in_batch' is processed entirely, and 'out_batch' must have enough capacity to - /// store all of the rows in 'in_batch'. - /// 'needs_serialize' is an argument so that codegen can replace it with a constant, - /// rather than using the member variable 'needs_serialize_'. - /// 'prefetch_mode' specifies the prefetching mode in use. If it's not PREFETCH_NONE, - /// hash table buckets will be prefetched based on the hash values computed. Note - /// that 'prefetch_mode' will be substituted with constants during codegen time. - /// 'remaining_capacity' is an array with PARTITION_FANOUT entries with the number of - /// additional rows that can be added to the hash table per partition. It is updated - /// by ProcessBatchStreaming() when it inserts new rows. - /// 'ht_ctx' is passed in as a way to avoid aliasing of 'this' confusing the optimiser. - Status ProcessBatchStreaming(bool needs_serialize, RowBatch* in_batch, RowBatch* out_batch, - PartitionedHashTableCtx* ht_ctx, - int remaining_capacity[PARTITION_FANOUT]); - - /// Tries to add intermediate to the hash table 'hash_tbl' of 'partition' for streaming - /// aggregation. The input row must have been evaluated with 'ht_ctx', with 'hash' set - /// to the corresponding hash. If the tuple already exists in the hash table, update - /// the tuple and return true. Otherwise try to create a new entry in the hash table, - /// returning true if successful or false if the table is full. 'remaining_capacity' - /// keeps track of how many more entries can be added to the hash table so we can avoid - /// retrying inserts. It is decremented if an insert succeeds and set to zero if an - /// insert fails. If an error occurs, returns false and sets 'status'. - bool TryAddToHashTable(PartitionedHashTableCtx* ht_ctx, Partition* partition, - PartitionedHashTable* hash_tbl, TupleRow* in_row, uint32_t hash, - int* remaining_capacity, Status* status); - - /// Initializes hash_partitions_. 'level' is the level for the partitions to create. - /// If 'single_partition_idx' is provided, it must be a number in range - /// [0, PARTITION_FANOUT), and only that partition is created - all others point to it. - /// Also sets ht_ctx_'s level to 'level'. - Status CreateHashPartitions(int level, int single_partition_idx = -1); - - /// Ensure that hash tables for all in-memory partitions are large enough to fit - /// 'num_rows' additional hash table entries. If there is not enough memory to - /// resize the hash tables, may spill partitions. 'aggregated_rows' is true if - /// we're currently partitioning aggregated rows. - Status CheckAndResizeHashPartitions(bool aggregated_rows, int num_rows, - const PartitionedHashTableCtx* ht_ctx); - - /// Prepares the next partition to return results from. On return, this function - /// initializes output_iterator_ and output_partition_. This either removes - /// a partition from aggregated_partitions_ (and is done) or removes the next - /// partition from aggregated_partitions_ and repartitions it. - Status NextPartition(); - - /// Tries to build the first partition in 'spilled_partitions_'. - /// If successful, set *built_partition to the partition. The caller owns the partition - /// and is responsible for closing it. If unsuccessful because the partition could not - /// fit in memory, set *built_partition to nullptr and append the spilled partition to the - /// head of 'spilled_partitions_' so it can be processed by - /// RepartitionSpilledPartition(). - Status BuildSpilledPartition(Partition** built_partition); - - /// Repartitions the first partition in 'spilled_partitions_' into PARTITION_FANOUT - /// output partitions. On success, each output partition is either: - /// * closed, if no rows were added to the partition. - /// * in 'spilled_partitions_', if the partition spilled. - /// * in 'aggregated_partitions_', if the output partition was not spilled. - Status RepartitionSpilledPartition(); - - /// Picks a partition from 'hash_partitions_' to spill. 'more_aggregate_rows' is passed - /// to Partition::Spill() when spilling the partition. See the Partition::Spill() - /// comment for further explanation. - Status SpillPartition(bool more_aggregate_rows); - - /// Moves the partitions in hash_partitions_ to aggregated_partitions_ or - /// spilled_partitions_. Partitions moved to spilled_partitions_ are unpinned. - /// input_rows is the number of input rows that have been repartitioned. - /// Used for diagnostics. - Status MoveHashPartitions(int64_t input_rows); - - /// Adds a partition to the front of 'spilled_partitions_' for later processing. - /// 'spilled_partitions_' uses LIFO so more finely partitioned partitions are processed - /// first). This allows us to delete pages earlier and bottom out the recursion - /// earlier and also improves time locality of access to spilled data on disk. - void PushSpilledPartition(Partition* partition); - - /// Calls Close() on every Partition in 'aggregated_partitions_', - /// 'spilled_partitions_', and 'hash_partitions_' and then resets the lists, - /// the vector and the partition pool. - void ClosePartitions(); - - /// Calls finalizes on all tuples starting at 'it'. - void CleanupHashTbl(const std::vector& agg_fn_evals, - PartitionedHashTable::Iterator it); -}; - -} // namespace doris diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc deleted file mode 100644 index 1b819a1c39..0000000000 --- a/be/src/exec/partitioned_hash_table.cc +++ /dev/null @@ -1,593 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/partitioned-hash-table.cc -// and modified by Doris - -#include - -#include -#include - -#include "exec/exec_node.h" -#include "exec/partitioned_hash_table.inline.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/raw_value.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" - -using namespace doris; -using namespace strings; - -// DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table"); - -// Random primes to multiply the seed with. -static uint32_t SEED_PRIMES[] = { - 1, // First seed must be 1, level 0 is used by other operators in the fragment. - 1431655781, 1183186591, 622729787, 472882027, 338294347, 275604541, 41161739, 29999999, - 27475109, 611603, 16313357, 11380003, 21261403, 33393119, 101, 71043403}; - -// Put a non-zero constant in the result location for nullptr. -// We don't want(nullptr, 1) to hash to the same as (0, 1). -// This needs to be as big as the biggest primitive type since the bytes -// get copied directly. -// TODO find a better approach, since primitives like CHAR(N) can be up -// to 255 bytes -static int64_t NULL_VALUE[] = { - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, - HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED}; - -PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build_exprs, - const std::vector& probe_exprs, - bool stores_nulls, - const std::vector& finds_nulls, - int32_t initial_seed, int max_levels, - MemPool* mem_pool, MemPool* expr_results_pool) - : build_exprs_(build_exprs), - probe_exprs_(probe_exprs), - stores_nulls_(stores_nulls), - finds_nulls_(finds_nulls), - finds_some_nulls_(std::accumulate(finds_nulls_.begin(), finds_nulls_.end(), false, - std::logical_or())), - level_(0), - scratch_row_(nullptr), - mem_pool_(mem_pool), - expr_results_pool_(expr_results_pool) { - DCHECK(!finds_some_nulls_ || stores_nulls_); - // Compute the layout and buffer size to store the evaluated expr results - DCHECK_EQ(build_exprs_.size(), probe_exprs_.size()); - DCHECK_EQ(build_exprs_.size(), finds_nulls_.size()); - DCHECK(!build_exprs_.empty()); - - // Populate the seeds to use for all the levels. TODO: revisit how we generate these. - DCHECK_GE(max_levels, 0); - DCHECK_LT(max_levels, sizeof(SEED_PRIMES) / sizeof(SEED_PRIMES[0])); - DCHECK_NE(initial_seed, 0); - seeds_.resize(max_levels + 1); - seeds_[0] = initial_seed; - for (int i = 1; i <= max_levels; ++i) { - seeds_[i] = seeds_[i - 1] * SEED_PRIMES[i]; - } -} - -Status PartitionedHashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, - const RowDescriptor& row_desc, - const RowDescriptor& row_desc_probe) { - int scratch_row_size = sizeof(Tuple*) * num_build_tuples; - scratch_row_ = reinterpret_cast(malloc(scratch_row_size)); - if (UNLIKELY(scratch_row_ == nullptr)) { - return Status::InternalError( - "Failed to allocate {} bytes for scratch row of " - "PartitionedHashTableCtx.", - scratch_row_size); - } - - // TODO chenhao replace ExprContext with ScalarFnEvaluator - for (int i = 0; i < build_exprs_.size(); i++) { - ExprContext* context = pool->add(new ExprContext(build_exprs_[i])); - RETURN_IF_ERROR(context->prepare(state, row_desc)); - build_expr_evals_.push_back(context); - } - DCHECK_EQ(build_exprs_.size(), build_expr_evals_.size()); - - for (int i = 0; i < probe_exprs_.size(); i++) { - ExprContext* context = pool->add(new ExprContext(probe_exprs_[i])); - RETURN_IF_ERROR(context->prepare(state, row_desc_probe)); - probe_expr_evals_.push_back(context); - } - DCHECK_EQ(probe_exprs_.size(), probe_expr_evals_.size()); - return expr_values_cache_.Init(state, build_exprs_); -} - -Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state, - const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, int num_build_tuples, MemPool* mem_pool, - MemPool* expr_results_pool, const RowDescriptor& row_desc, - const RowDescriptor& row_desc_probe, - std::unique_ptr* ht_ctx) { - ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls, finds_nulls, - initial_seed, max_levels, mem_pool, - expr_results_pool)); - return (*ht_ctx)->Init(pool, state, num_build_tuples, row_desc, row_desc_probe); -} - -Status PartitionedHashTableCtx::Open(RuntimeState* state) { - // TODO chenhao replace ExprContext with ScalarFnEvaluator - for (int i = 0; i < build_expr_evals_.size(); i++) { - RETURN_IF_ERROR(build_expr_evals_[i]->open(state)); - } - for (int i = 0; i < probe_expr_evals_.size(); i++) { - RETURN_IF_ERROR(probe_expr_evals_[i]->open(state)); - } - return Status::OK(); -} - -void PartitionedHashTableCtx::Close(RuntimeState* state) { - free(scratch_row_); - scratch_row_ = nullptr; - expr_values_cache_.Close(); - for (int i = 0; i < build_expr_evals_.size(); i++) { - build_expr_evals_[i]->close(state); - } - - for (int i = 0; i < probe_expr_evals_.size(); i++) { - probe_expr_evals_[i]->close(state); - } - - // TODO chenhao release new expr in Init, remove this after merging - // ScalarFnEvaluator. - build_expr_evals_.clear(); - probe_expr_evals_.clear(); -} - -void PartitionedHashTableCtx::FreeBuildLocalAllocations() { - //ExprContext::FreeLocalAllocations(build_expr_evals_); -} - -void PartitionedHashTableCtx::FreeProbeLocalAllocations() { - //ExprContext::FreeLocalAllocations(probe_expr_evals_); -} - -void PartitionedHashTableCtx::FreeLocalAllocations() { - FreeBuildLocalAllocations(); - FreeProbeLocalAllocations(); -} - -uint32_t PartitionedHashTableCtx::Hash(const void* input, int len, uint32_t hash) const { - /// Use CRC hash at first level for better performance. Switch to murmur hash at - /// subsequent levels since CRC doesn't randomize well with different seed inputs. - if (level_ == 0) return HashUtil::hash(input, len, hash); - return HashUtil::murmur_hash2_64(input, len, hash); -} - -uint32_t PartitionedHashTableCtx::HashRow(const uint8_t* expr_values, - const uint8_t* expr_values_null) const noexcept { - DCHECK_LT(level_, seeds_.size()); - if (expr_values_cache_.var_result_offset() == -1) { - /// This handles NULLs implicitly since a constant seed value was put - /// into results buffer for nulls. - return Hash(expr_values, expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]); - } else { - return PartitionedHashTableCtx::HashVariableLenRow(expr_values, expr_values_null); - } -} - -bool PartitionedHashTableCtx::EvalRow(TupleRow* row, const vector& ctxs, - uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - bool has_null = false; - for (int i = 0; i < ctxs.size(); ++i) { - void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - void* val = ctxs[i]->get_value(row); - if (val == nullptr) { - // If the table doesn't store nulls, no reason to keep evaluating - if (!stores_nulls_) return true; - expr_values_null[i] = true; - val = reinterpret_cast(&NULL_VALUE); - has_null = true; - DCHECK_LE(build_exprs_[i]->type().get_slot_size(), sizeof(NULL_VALUE)); - RawValue::write(val, loc, build_exprs_[i]->type(), nullptr); - } else { - expr_values_null[i] = false; - DCHECK_LE(build_exprs_[i]->type().get_slot_size(), sizeof(NULL_VALUE)); - RawValue::write(val, loc, build_exprs_[i]->type(), expr_results_pool_); - } - } - return has_null; -} - -uint32_t PartitionedHashTableCtx::HashVariableLenRow(const uint8_t* expr_values, - const uint8_t* expr_values_null) const { - uint32_t hash = seeds_[level_]; - int var_result_offset = expr_values_cache_.var_result_offset(); - // Hash the non-var length portions (if there are any) - if (var_result_offset != 0) { - hash = Hash(expr_values, var_result_offset, hash); - } - - for (int i = 0; i < build_exprs_.size(); ++i) { - // non-string and null slots are already part of 'expr_values'. - // if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING - PrimitiveType type = build_exprs_[i]->type().type; - if (type != TYPE_CHAR && type != TYPE_VARCHAR && type != TYPE_STRING) { - continue; - } - - const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - if (expr_values_null[i]) { - // Hash the null random seed values at 'loc' - hash = Hash(loc, sizeof(StringValue), hash); - } else { - // Hash the string - // TODO: when using CRC hash on empty string, this only swaps bytes. - const StringValue* str = reinterpret_cast(loc); - hash = Hash(str->ptr, str->len, hash); - } - } - return hash; -} - -template -bool PartitionedHashTableCtx::Equals(TupleRow* build_row, const uint8_t* expr_values, - const uint8_t* expr_values_null) const noexcept { - for (int i = 0; i < build_expr_evals_.size(); ++i) { - void* val = build_expr_evals_[i]->get_value(build_row); - if (val == nullptr) { - if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false; - if (!expr_values_null[i]) return false; - continue; - } else { - if (expr_values_null[i]) return false; - } - - const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - if (!RawValue::eq(loc, val, build_exprs_[i]->type())) { - return false; - } - } - return true; -} - -template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, const uint8_t* expr_values, - const uint8_t* expr_values_null) const; -template bool PartitionedHashTableCtx::Equals(TupleRow* build_row, - const uint8_t* expr_values, - const uint8_t* expr_values_null) const; - -PartitionedHashTableCtx::ExprValuesCache::ExprValuesCache() - : capacity_(0), - cur_expr_values_(nullptr), - cur_expr_values_null_(nullptr), - cur_expr_values_hash_(nullptr), - cur_expr_values_hash_end_(nullptr), - expr_values_array_(nullptr), - expr_values_null_array_(nullptr), - expr_values_hash_array_(nullptr), - null_bitmap_(0) {} - -Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, - const std::vector& build_exprs) { - // Initialize the number of expressions. - num_exprs_ = build_exprs.size(); - // Compute the layout of evaluated values of a row. - expr_values_bytes_per_row_ = - Expr::compute_results_layout(build_exprs, &expr_values_offsets_, &var_result_offset_); - if (expr_values_bytes_per_row_ == 0) { - DCHECK_EQ(num_exprs_, 0); - return Status::OK(); - } - DCHECK_GT(expr_values_bytes_per_row_, 0); - // Compute the maximum number of cached rows which can fit in the memory budget. - // TODO: Find the optimal prefetch batch size. This may be something - // processor dependent so we may need calibration at Impala startup time. - capacity_ = std::max(1, std::min(state->batch_size(), - MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); - - int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - if (UNLIKELY(!thread_context()->thread_mem_tracker()->check_limit(mem_usage))) { - capacity_ = 0; - string details = Substitute( - "PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes", mem_usage); - RETURN_LIMIT_EXCEEDED(state, details, mem_usage); - } - - int expr_values_size = expr_values_bytes_per_row_ * capacity_; - expr_values_array_.reset(new uint8_t[expr_values_size]); - cur_expr_values_ = expr_values_array_.get(); - memset(cur_expr_values_, 0, expr_values_size); - - int expr_values_null_size = num_exprs_ * capacity_; - expr_values_null_array_.reset(new uint8_t[expr_values_null_size]); - cur_expr_values_null_ = expr_values_null_array_.get(); - memset(cur_expr_values_null_, 0, expr_values_null_size); - - expr_values_hash_array_.reset(new uint32_t[capacity_]); - cur_expr_values_hash_ = expr_values_hash_array_.get(); - cur_expr_values_hash_end_ = cur_expr_values_hash_; - memset(cur_expr_values_hash_, 0, sizeof(uint32) * capacity_); - - null_bitmap_.Reset(capacity_); - return Status::OK(); -} - -void PartitionedHashTableCtx::ExprValuesCache::Close() { - if (capacity_ == 0) return; - cur_expr_values_ = nullptr; - cur_expr_values_null_ = nullptr; - cur_expr_values_hash_ = nullptr; - cur_expr_values_hash_end_ = nullptr; - expr_values_array_.reset(); - expr_values_null_array_.reset(); - expr_values_hash_array_.reset(); - null_bitmap_.Reset(0); -} - -int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, int expr_values_bytes_per_row, - int num_exprs) { - return expr_values_bytes_per_row * capacity + // expr_values_array_ - num_exprs * capacity + // expr_values_null_array_ - sizeof(uint32) * capacity + // expr_values_hash_array_ - Bitmap::MemUsage(capacity); // null_bitmap_ -} - -void PartitionedHashTableCtx::ExprValuesCache::ResetIterators() { - cur_expr_values_ = expr_values_array_.get(); - cur_expr_values_null_ = expr_values_null_array_.get(); - cur_expr_values_hash_ = expr_values_hash_array_.get(); -} - -void PartitionedHashTableCtx::ExprValuesCache::Reset() noexcept { - ResetIterators(); - // Set the end pointer after resetting the other pointers so they point to - // the same location. - cur_expr_values_hash_end_ = cur_expr_values_hash_; - null_bitmap_.SetAllBits(false); -} - -void PartitionedHashTableCtx::ExprValuesCache::ResetForRead() { - // Record the end of hash values iterator to be used in AtEnd(). - // Do it before resetting the pointers. - cur_expr_values_hash_end_ = cur_expr_values_hash_; - ResetIterators(); -} - -constexpr double PartitionedHashTable::MAX_FILL_FACTOR; -constexpr int64_t PartitionedHashTable::DATA_PAGE_SIZE; - -PartitionedHashTable* PartitionedHashTable::Create(Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, - BufferedTupleStream3* tuple_stream, - int64_t max_num_buckets, - int64_t initial_num_buckets) { - return new PartitionedHashTable(config::enable_quadratic_probing, allocator, stores_duplicates, - num_build_tuples, tuple_stream, max_num_buckets, - initial_num_buckets); -} - -PartitionedHashTable::PartitionedHashTable(bool quadratic_probing, Suballocator* allocator, - bool stores_duplicates, int num_build_tuples, - BufferedTupleStream3* stream, int64_t max_num_buckets, - int64_t num_buckets) - : allocator_(allocator), - tuple_stream_(stream), - stores_tuples_(num_build_tuples == 1), - stores_duplicates_(stores_duplicates), - quadratic_probing_(quadratic_probing), - total_data_page_size_(0), - next_node_(nullptr), - node_remaining_current_page_(0), - num_duplicate_nodes_(0), - max_num_buckets_(max_num_buckets), - buckets_(nullptr), - num_buckets_(num_buckets), - num_filled_buckets_(0), - num_buckets_with_duplicates_(0), - num_build_tuples_(num_build_tuples), - has_matches_(false), - num_probes_(0), - num_failed_probes_(0), - travel_length_(0), - num_hash_collisions_(0), - num_resizes_(0) { - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; - DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0"; - DCHECK(stores_tuples_ || stream != nullptr); -} - -Status PartitionedHashTable::Init(bool* got_memory) { - int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket); - RETURN_IF_ERROR(allocator_->Allocate(buckets_byte_size, &bucket_allocation_)); - if (bucket_allocation_ == nullptr) { - num_buckets_ = 0; - *got_memory = false; - return Status::OK(); - } - buckets_ = reinterpret_cast(bucket_allocation_->data()); - memset(buckets_, 0, buckets_byte_size); - *got_memory = true; - return Status::OK(); -} - -void PartitionedHashTable::Close() { - // Print statistics only for the large or heavily used hash tables. - // TODO: Tweak these numbers/conditions, or print them always? - const int64_t LARGE_HT = 128 * 1024; - const int64_t HEAVILY_USED = 1024 * 1024; - // TODO: These statistics should go to the runtime profile as well. - if ((num_buckets_ > LARGE_HT) || (num_probes_ > HEAVILY_USED)) VLOG_CRITICAL << PrintStats(); - for (auto& data_page : data_pages_) allocator_->Free(std::move(data_page)); - data_pages_.clear(); - if (bucket_allocation_ != nullptr) allocator_->Free(std::move(bucket_allocation_)); -} - -Status PartitionedHashTable::CheckAndResize(uint64_t buckets_to_fill, - const PartitionedHashTableCtx* ht_ctx, - bool* got_memory) { - uint64_t shift = 0; - while (num_filled_buckets_ + buckets_to_fill > (num_buckets_ << shift) * MAX_FILL_FACTOR) { - ++shift; - } - if (shift > 0) return ResizeBuckets(num_buckets_ << shift, ht_ctx, got_memory); - *got_memory = true; - return Status::OK(); -} - -Status PartitionedHashTable::ResizeBuckets(int64_t num_buckets, - const PartitionedHashTableCtx* ht_ctx, - bool* got_memory) { - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) - << "num_buckets=" << num_buckets << " must be a power of 2"; - DCHECK_GT(num_buckets, num_filled_buckets_) - << "Cannot shrink the hash table to smaller number of buckets than the number of " - << "filled buckets."; - VLOG_CRITICAL << "Resizing hash table from " << num_buckets_ << " to " << num_buckets - << " buckets."; - if (max_num_buckets_ != -1 && num_buckets > max_num_buckets_) { - *got_memory = false; - return Status::OK(); - } - ++num_resizes_; - - // All memory that can grow proportional to the input should come from the block mgrs - // mem tracker. - // Note that while we copying over the contents of the old hash table, we need to have - // allocated both the old and the new hash table. Once we finish, we return the memory - // of the old hash table. - // int64_t old_size = num_buckets_ * sizeof(Bucket); - int64_t new_size = num_buckets * sizeof(Bucket); - - std::unique_ptr new_allocation; - RETURN_IF_ERROR(allocator_->Allocate(new_size, &new_allocation)); - if (new_allocation == nullptr) { - *got_memory = false; - return Status::OK(); - } - Bucket* new_buckets = reinterpret_cast(new_allocation->data()); - memset(new_buckets, 0, new_size); - - // Walk the old table and copy all the filled buckets to the new (resized) table. - // We do not have to do anything with the duplicate nodes. This operation is expected - // to succeed. - for (PartitionedHashTable::Iterator iter = Begin(ht_ctx); !iter.AtEnd(); - NextFilledBucket(&iter.bucket_idx_, &iter.node_)) { - Bucket* bucket_to_copy = &buckets_[iter.bucket_idx_]; - bool found = false; - int64_t bucket_idx = - Probe(new_buckets, num_buckets, nullptr, bucket_to_copy->hash, &found); - DCHECK(!found); - DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND) - << " Probe failed even though " - " there are free buckets. " - << num_buckets << " " << num_filled_buckets_; - Bucket* dst_bucket = &new_buckets[bucket_idx]; - *dst_bucket = *bucket_to_copy; - } - - num_buckets_ = num_buckets; - allocator_->Free(std::move(bucket_allocation_)); - bucket_allocation_ = std::move(new_allocation); - buckets_ = reinterpret_cast(bucket_allocation_->data()); - *got_memory = true; - return Status::OK(); -} - -bool PartitionedHashTable::GrowNodeArray(Status* status) { - std::unique_ptr allocation; - *status = allocator_->Allocate(DATA_PAGE_SIZE, &allocation); - if (!status->ok() || allocation == nullptr) return false; - next_node_ = reinterpret_cast(allocation->data()); - data_pages_.push_back(std::move(allocation)); - node_remaining_current_page_ = DATA_PAGE_SIZE / sizeof(DuplicateNode); - total_data_page_size_ += DATA_PAGE_SIZE; - return true; -} - -void PartitionedHashTable::DebugStringTuple(std::stringstream& ss, HtData& htdata, - const RowDescriptor* desc) { - if (stores_tuples_) { - ss << "(" << htdata.tuple << ")"; - } else { - ss << "(" << htdata.flat_row << ")"; - } - if (desc != nullptr) { - Tuple* row[num_build_tuples_]; - ss << " " << GetRow(htdata, reinterpret_cast(row))->to_string(*desc); - } -} - -string PartitionedHashTable::DebugString(bool skip_empty, bool show_match, - const RowDescriptor* desc) { - std::stringstream ss; - ss << std::endl; - for (int i = 0; i < num_buckets_; ++i) { - if (skip_empty && !buckets_[i].filled) continue; - ss << i << ": "; - if (show_match) { - if (buckets_[i].matched) { - ss << " [M]"; - } else { - ss << " [U]"; - } - } - if (buckets_[i].hasDuplicates) { - DuplicateNode* node = buckets_[i].bucketData.duplicates; - bool first = true; - ss << " [D] "; - while (node != nullptr) { - if (!first) ss << ","; - DebugStringTuple(ss, node->htdata, desc); - node = node->next; - first = false; - } - } else { - ss << " [B] "; - if (buckets_[i].filled) { - DebugStringTuple(ss, buckets_[i].bucketData.htdata, desc); - } else { - ss << " - "; - } - } - ss << std::endl; - } - return ss.str(); -} - -string PartitionedHashTable::PrintStats() const { - double curr_fill_factor = (double)num_filled_buckets_ / (double)num_buckets_; - double avg_travel = (double)travel_length_ / (double)num_probes_; - double avg_collisions = (double)num_hash_collisions_ / (double)num_filled_buckets_; - std::stringstream ss; - ss << "Buckets: " << num_buckets_ << " " << num_filled_buckets_ << " " << curr_fill_factor - << std::endl; - ss << "Duplicates: " << num_buckets_with_duplicates_ << " buckets " << num_duplicate_nodes_ - << " nodes" << std::endl; - ss << "Probes: " << num_probes_ << std::endl; - ss << "FailedProbes: " << num_failed_probes_ << std::endl; - ss << "Travel: " << travel_length_ << " " << avg_travel << std::endl; - ss << "HashCollisions: " << num_hash_collisions_ << " " << avg_collisions << std::endl; - ss << "Resizes: " << num_resizes_ << std::endl; - return ss.str(); -} diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h deleted file mode 100644 index c531b8da0a..0000000000 --- a/be/src/exec/partitioned_hash_table.h +++ /dev/null @@ -1,956 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/partitioned-hash-table.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "common/compiler_util.h" -#include "runtime/buffered_tuple_stream3.h" -#include "runtime/bufferpool/suballocator.h" -#include "runtime/tuple_row.h" -#include "util/bitmap.h" - -namespace doris { - -class Expr; -class ExprContext; -class PartitionedHashTable; -class RowDescriptor; -class RuntimeState; -class Tuple; -class TupleRow; - -/// Linear or quadratic probing hash table implementation tailored to the usage pattern -/// for partitioned hash aggregation and hash joins. The hash table stores TupleRows and -/// allows for different exprs for insertions and finds. This is the pattern we use for -/// joins and aggregation where the input/build tuple row descriptor is different from the -/// find/probe descriptor. The implementation is designed to allow codegen for some paths. -// -/// In addition to the hash table there is also an accompanying hash table context that is -/// used for insertions and probes. For example, the hash table context stores evaluated -/// expr results for the current row being processed when possible into a contiguous -/// memory buffer. This allows for efficient hash computation. -// -/// The hash table does not support removes. The hash table is not thread safe. -/// The table is optimized for the partition hash aggregation and hash joins and is not -/// intended to be a generic hash table implementation. The API loosely mimics the -/// std::hashset API. -// -/// The data (rows) are stored in a BufferedTupleStream3. The basic data structure of this -/// hash table is a vector of buckets. The buckets (indexed by the mod of the hash) -/// contain a pointer to either the slot in the tuple-stream or in case of duplicate -/// values, to the head of a linked list of nodes that in turn contain a pointer to -/// tuple-stream slots. When inserting an entry we start at the bucket at position -/// (hash % size) and search for either a bucket with the same hash or for an empty -/// bucket. If a bucket with the same hash is found, we then compare for row equality and -/// either insert a duplicate node if the equality is true, or continue the search if the -/// row equality is false. Similarly, when probing we start from the bucket at position -/// (hash % size) and search for an entry with the same hash or for an empty bucket. -/// In the former case, we then check for row equality and continue the search if the row -/// equality is false. In the latter case, the probe is not successful. When growing the -/// hash table, the number of buckets is doubled. We trigger a resize when the fill -/// factor is approx 75%. Due to the doubling nature of the buckets, we require that the -/// number of buckets is a power of 2. This allows us to perform a modulo of the hash -/// using a bitmask. -/// -/// We choose to use linear or quadratic probing because they exhibit good (predictable) -/// cache behavior. -/// -/// The first NUM_SMALL_BLOCKS of nodes_ are made of blocks less than the IO size (of 8MB) -/// to reduce the memory footprint of small queries. -/// -/// TODO: Compare linear and quadratic probing and remove the loser. -/// TODO: We currently use 32-bit hashes. There is room in the bucket structure for at -/// least 48-bits. We should exploit this space. -/// TODO: Consider capping the probes with a threshold value. If an insert reaches -/// that threshold it is inserted to another linked list of overflow entries. -/// TODO: Smarter resizes, and perhaps avoid using powers of 2 as the hash table size. -/// TODO: this is not a fancy hash table in terms of memory access patterns -/// (cuckoo-hashing or something that spills to disk). We will likely want to invest -/// more time into this. -/// TODO: hash-join and aggregation have very different access patterns. Joins insert all -/// the rows and then calls scan to find them. Aggregation interleaves FindProbeRow() and -/// Inserts(). We may want to optimize joins more heavily for Inserts() (in particular -/// growing). -/// TODO: Batched interface for inserts and finds. -/// TODO: Do we need to check mem limit exceeded so often. Check once per batch? -/// TODO: as an optimization, compute variable-length data size for the agg node. - -/// Control block for a hash table. This class contains the logic as well as the variables -/// needed by a thread to operate on a hash table. -class PartitionedHashTableCtx { -public: - /// Create a hash table context with the specified parameters, invoke Init() to - /// initialize the new hash table context and return it in 'ht_ctx'. Expression - /// evaluators for the build and probe expressions will also be allocated. - /// Please see the comments of HashTableCtx constructor and Init() for details - /// of other parameters. - static Status Create(ObjectPool* pool, RuntimeState* state, - const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, int max_levels, - int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe, - std::unique_ptr* ht_ctx); - - /// Initialize the build and probe expression evaluators. - Status Open(RuntimeState* state); - - /// Call to cleanup any resources allocated by the expression evaluators. - void Close(RuntimeState* state); - - /// Free local allocations made by build and probe expression evaluators respectively. - void FreeBuildLocalAllocations(); - void FreeProbeLocalAllocations(); - - /// Free local allocations of both build and probe expression evaluators. - void FreeLocalAllocations(); - - void set_level(int level); - - int ALWAYS_INLINE level() const { return level_; } - - uint32_t ALWAYS_INLINE seed(int level) { return seeds_.at(level); } - - TupleRow* ALWAYS_INLINE scratch_row() const { return scratch_row_; } - - /// Returns the results of the expression at 'expr_idx' evaluated at the current row. - /// This value is invalid if the expr evaluated to nullptr. - /// TODO: this is an awkward abstraction but aggregation node can take advantage of - /// it and save some expr evaluation calls. - void* ALWAYS_INLINE ExprValue(int expr_idx) const { - return expr_values_cache_.ExprValuePtr(expr_values_cache_.cur_expr_values(), expr_idx); - } - - /// Returns if the expression at 'expr_idx' is evaluated to nullptr for the current row. - bool ALWAYS_INLINE ExprValueNull(int expr_idx) const { - return static_cast(*(expr_values_cache_.cur_expr_values_null() + expr_idx)); - } - - /// Evaluate and hash the build/probe row, saving the evaluation to the current row of - /// the ExprValuesCache in this hash table context: the results are saved in - /// 'cur_expr_values_', the nullness of expressions values in 'cur_expr_values_null_', - /// and the hashed expression values in 'cur_expr_values_hash_'. Returns false if this - /// row should be rejected (doesn't need to be processed further) because it contains - /// nullptr. These need to be inlined in the IR module so we can find and replace the - /// calls to EvalBuildRow()/EvalProbeRow(). - bool EvalAndHashBuild(TupleRow* row); - bool EvalAndHashProbe(TupleRow* row); - - /// Struct that returns the number of constants replaced by ReplaceConstants(). - struct HashTableReplacedConstants { - int stores_nulls; - int finds_some_nulls; - int stores_tuples; - int stores_duplicates; - int quadratic_probing; - }; - - /// To enable prefetching, the hash table building and probing are pipelined by the - /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and - /// the corresponding hash table buckets are prefetched before they are probed against - /// the hash table. ExprValuesCache is a container for caching the results of - /// expressions evaluations for the rows in a prefetch set to avoid re-evaluating the - /// rows again during probing. Expressions evaluation can be very expensive. - /// - /// The expression evaluation results are cached in the following data structures: - /// - /// - 'expr_values_array_' is an array caching the results of the rows - /// evaluated against either the build or probe expressions. 'cur_expr_values_' - /// is a pointer into this array. - /// - 'expr_values_null_array_' is an array caching the nullness of each evaluated - /// expression in each row. 'cur_expr_values_null_' is a pointer into this array. - /// - 'expr_values_hash_array_' is an array of cached hash values of the rows. - /// 'cur_expr_values_hash_' is a pointer into this array. - /// - 'null_bitmap_' is a bitmap which indicates rows evaluated to nullptr. - /// - /// ExprValuesCache provides an iterator like interface for performing a write pass - /// followed by a read pass. We refrain from providing an interface for random accesses - /// as there isn't a use case for it now and we want to avoid expensive multiplication - /// as the buffer size of each row is not necessarily power of two: - /// - Reset(), ResetForRead(): reset the iterators before writing / reading cached values. - /// - NextRow(): moves the iterators to point to the next row of cached values. - /// - AtEnd(): returns true if all cached rows have been read. Valid in read mode only. - /// - /// Various metadata information such as layout of results buffer is also stored in - /// this class. Note that the result buffer doesn't store variable length data. It only - /// contains pointers to the variable length data (e.g. if an expression value is a - /// StringValue). - /// - class ExprValuesCache { - public: - ExprValuesCache(); - - /// Allocates memory and initializes various data structures. Return error status - /// if memory allocation leads to the memory limits of the exec node to be exceeded. - Status Init(RuntimeState* state, const std::vector& build_exprs); - - /// Frees up various resources. - void Close(); - - /// Resets the cache states (iterators, end pointers etc) before writing. - void Reset() noexcept; - - /// Resets the iterators to the start before reading. Will record the current position - /// of the iterators in end pointer before resetting so AtEnd() can determine if all - /// cached values have been read. - void ResetForRead(); - - /// Advances the iterators to the next row by moving to the next entries in the - /// arrays of cached values. - void ALWAYS_INLINE NextRow(); - - /// Compute the total memory usage of this ExprValuesCache. - static int MemUsage(int capacity, int results_buffer_size, int num_build_exprs); - - /// Returns the maximum number rows of expression values states which can be cached. - int ALWAYS_INLINE capacity() const { return capacity_; } - - /// Returns the total size in bytes of a row of evaluated expressions' values. - int ALWAYS_INLINE expr_values_bytes_per_row() const { return expr_values_bytes_per_row_; } - - /// Returns the offset into the result buffer of the first variable length - /// data results. - int ALWAYS_INLINE var_result_offset() const { return var_result_offset_; } - - /// Returns true if the current read pass is complete, meaning all cached values - /// have been read. - bool ALWAYS_INLINE AtEnd() const { - return cur_expr_values_hash_ == cur_expr_values_hash_end_; - } - - /// Returns true if the current row is null but nulls are not considered in the current - /// phase (build or probe). - bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get(CurIdx()); } - - /// Record in a bitmap that the current row is null but nulls are not considered in - /// the current phase (build or probe). - void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set(CurIdx(), true); } - - /// Returns the hash values of the current row. - uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; } - - /// Sets the hash values for the current row. - void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { *cur_expr_values_hash_ = hash; } - - /// Returns a pointer to the expression value at 'expr_idx' in 'expr_values'. - template - T ExprValuePtr(T expr_values, int expr_idx) const { - return expr_values + expr_values_offsets_[expr_idx]; - }; - - /// Returns the current row's expression buffer. The expression values in the buffer - /// are accessed using ExprValuePtr(). - uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; } - - /// Returns null indicator bytes for the current row, one per expression. Non-zero - /// bytes mean nullptr, zero bytes mean non-nullptr. Indexed by the expression index. - /// These are uint8_t instead of bool to simplify codegen with IRBuilder. - /// TODO: is there actually a valid reason why this is necessary for codegen? - uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return cur_expr_values_null_; } - - /// Returns the offset into the results buffer of the expression value at 'expr_idx'. - int ALWAYS_INLINE expr_values_offsets(int expr_idx) const { - return expr_values_offsets_[expr_idx]; - } - - private: - friend class PartitionedHashTableCtx; - - /// Resets the iterators to the beginning of the cache values' arrays. - void ResetIterators(); - - /// Returns the offset in number of rows into the cached values' buffer. - int ALWAYS_INLINE CurIdx() const { - return cur_expr_values_hash_ - expr_values_hash_array_.get(); - } - - /// Max amount of memory in bytes for caching evaluated expression values. - static const int MAX_EXPR_VALUES_ARRAY_SIZE = 256 << 10; - - /// Maximum number of rows of expressions evaluation states which this - /// ExprValuesCache can cache. - int capacity_; - - /// Byte size of a row of evaluated expression values. Never changes once set, - /// can be used for constant substitution during codegen. - int expr_values_bytes_per_row_; - - /// Number of build/probe expressions. - int num_exprs_; - - /// Pointer into 'expr_values_array_' for the current row's expression values. - uint8_t* cur_expr_values_; - - /// Pointer into 'expr_values_null_array_' for the current row's nullness of each - /// expression value. - uint8_t* cur_expr_values_null_; - - /// Pointer into 'expr_hash_value_array_' for the hash value of current row's - /// expression values. - uint32_t* cur_expr_values_hash_; - - /// Pointer to the buffer one beyond the end of the last entry of cached expressions' - /// hash values. - uint32_t* cur_expr_values_hash_end_; - - /// Array for caching up to 'capacity_' number of rows worth of evaluated expression - /// values. Each row consumes 'expr_values_bytes_per_row_' number of bytes. - std::unique_ptr expr_values_array_; - - /// Array for caching up to 'capacity_' number of rows worth of null booleans. - /// Each row contains 'num_exprs_' booleans to indicate nullness of expression values. - /// Used when the hash table supports nullptr. Use 'uint8_t' to guarantee each entry is 1 - /// byte as sizeof(bool) is implementation dependent. The IR depends on this - /// assumption. - std::unique_ptr expr_values_null_array_; - - /// Array for caching up to 'capacity_' number of rows worth of hashed values. - std::unique_ptr expr_values_hash_array_; - - /// One bit for each row. A bit is set if that row is not hashed as it's evaluated - /// to nullptr but the hash table doesn't support nullptr. Such rows may still be included - /// in outputs for certain join types (e.g. left anti joins). - Bitmap null_bitmap_; - - /// Maps from expression index to the byte offset into a row of expression values. - /// One entry per build/probe expression. - std::vector expr_values_offsets_; - - /// Byte offset into 'cur_expr_values_' that begins the variable length results for - /// a row. If -1, there are no variable length slots. Never changes once set, can be - /// constant substituted with codegen. - int var_result_offset_; - }; - - ExprValuesCache* ALWAYS_INLINE expr_values_cache() { return &expr_values_cache_; } - -private: - friend class PartitionedAggregationNode; - friend class PartitionedHashTable; - friend class HashTableTest_HashEmpty_Test; - - /// Construct a hash table context. - /// - build_exprs are the exprs that should be used to evaluate rows during Insert(). - /// - probe_exprs are used during FindProbeRow() - /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert - /// - finds_nulls: if finds_nulls[i] is false, FindProbeRow() returns End() for - /// TupleRows with nulls in position i even if stores_nulls is true. - /// - initial_seed: initial seed value to use when computing hashes for rows with - /// level 0. Other levels have their seeds derived from this seed. - /// - max_levels: the max lhashevels we will hash with. - /// - mem_pool: the MemPool which the expression evaluators allocate from. Owned by the - /// exec node which owns this hash table context. Memory usage of the expression - /// value cache is charged against its MemTracker. - /// - /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined - /// with '<=>' and others with '=', stores_nulls could distinguish between columns - /// in which nulls are stored and columns in which they are not, which could save - /// space by not storing some rows we know will never match. - PartitionedHashTableCtx(const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, MemPool* mem_pool, MemPool* expr_results_pool); - - /// Allocate various buffers for storing expression evaluation results, hash values, - /// null bits etc. Also allocate evaluators for the build and probe expressions and - /// store them in 'pool'. Returns error if allocation causes query memory limit to - /// be exceeded or the evaluators fail to initialize. 'num_build_tuples' is the number - /// of tuples of a row in the build side, used for computing the size of a scratch row. - Status Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, - const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe); - - /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null'. - /// This will be replaced by codegen. We don't want this inlined for replacing - /// with codegen'd functions so the function name does not change. - uint32_t HashRow(const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept; - - /// Wrapper function for calling correct HashUtil function in non-codegen'd case. - uint32_t Hash(const void* input, int len, uint32_t hash) const; - - /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into - /// 'expr_values_null'. This will be replaced by codegen. We do not want this function - /// inlined when cross compiled because we need to be able to differentiate between - /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the - /// codegen'd function. - bool EvalBuildRow(TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - return EvalRow(row, build_expr_evals_, expr_values, expr_values_null); - } - - /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness - /// into 'expr_values_null'. This will be replaced by codegen. - bool EvalProbeRow(TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { - return EvalRow(row, probe_expr_evals_, expr_values, expr_values_null); - } - - /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null' - /// for a row with variable length fields (e.g. strings). - uint32_t HashVariableLenRow(const uint8_t* expr_values, const uint8_t* expr_values_null) const; - - /// Evaluate the exprs over row, storing the values into 'expr_values' and nullness into - /// 'expr_values_null'. Returns whether any expr evaluated to nullptr. This will be - /// replaced by codegen. - bool EvalRow(TupleRow* row, const std::vector& ctxs, uint8_t* expr_values, - uint8_t* expr_values_null) noexcept; - - /// Returns true if the values of build_exprs evaluated over 'build_row' equal the - /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is - /// true if all nulls should be treated as equal, regardless of the values of - /// 'finds_nulls_'. This will be replaced by codegen. - template - bool Equals(TupleRow* build_row, const uint8_t* expr_values, - const uint8_t* expr_values_null) const noexcept; - - /// Helper function that calls Equals() with the current row. Always inlined so that - /// it does not appear in cross-compiled IR. - template - bool ALWAYS_INLINE Equals(TupleRow* build_row) const { - return Equals(build_row, expr_values_cache_.cur_expr_values(), - expr_values_cache_.cur_expr_values_null()); - } - - /// Cross-compiled function to access member variables used in CodegenHashRow(). - uint32_t GetHashSeed() const; - - /// Functions to be replaced by codegen to specialize the hash table. - bool stores_nulls() const { return stores_nulls_; } - bool finds_some_nulls() const { return finds_some_nulls_; } - - const std::vector& build_exprs_; - std::vector build_expr_evals_; - - const std::vector& probe_exprs_; - std::vector probe_expr_evals_; - - /// Constants on how the hash table should behave. Joins and aggs have slightly - /// different behavior. - const bool stores_nulls_; - const std::vector finds_nulls_; - - /// finds_some_nulls_ is just the logical OR of finds_nulls_. - const bool finds_some_nulls_; - - /// The current level this context is working on. Each level needs to use a - /// different seed. - int level_; - - /// The seeds to use for hashing. Indexed by the level. - std::vector seeds_; - - /// The ExprValuesCache for caching expression evaluation results, null bytes and hash - /// values for rows. Used to store results of batch evaluations of rows. - ExprValuesCache expr_values_cache_; - - /// Scratch buffer to generate rows on the fly. - TupleRow* scratch_row_; - - /// MemPool for 'build_expr_evals_' and 'probe_expr_evals_' to allocate expr-managed - /// memory from. Not owned. - MemPool* mem_pool_; - - // MemPool for allocations by made EvalRow to copy expr's StringVal result. Not owned - MemPool* expr_results_pool_; -}; - -/// The hash table consists of a contiguous array of buckets that contain a pointer to the -/// data, the hash value and three flags: whether this bucket is filled, whether this -/// entry has been matched (used in right and full joins) and whether this entry has -/// duplicates. If there are duplicates, then the data is pointing to the head of a -/// linked list of duplicate nodes that point to the actual data. Note that the duplicate -/// nodes do not contain the hash value, because all the linked nodes have the same hash -/// value, the one in the bucket. The data is either a tuple stream index or a Tuple*. -/// This array of buckets is sparse, we are shooting for up to 3/4 fill factor (75%). The -/// data allocated by the hash table comes from the BufferPool. -class PartitionedHashTable { -private: - /// Rows are represented as pointers into the BufferedTupleStream data with one - /// of two formats, depending on the number of tuples in the row. - union HtData { - // For rows with multiple tuples per row, a pointer to the flattened TupleRow. - BufferedTupleStream3::FlatRowPtr flat_row; - Tuple* tuple; - }; - - /// Linked list of entries used for duplicates. - struct DuplicateNode { - /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the - /// row in the DuplicateNode has been matched. - /// From an abstraction point of view, this is an awkward place to store this - /// information. - /// TODO: Fold this flag in the next pointer below. - bool matched; - - /// Chain to next duplicate node, nullptr when end of list. - DuplicateNode* next; - HtData htdata; - }; - - struct Bucket { - /// Whether this bucket contains a valid entry, or it is empty. - bool filled; - - /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the - /// row in the bucket has been matched. - /// From an abstraction point of view, this is an awkward place to store this - /// information but it is efficient. This space is otherwise unused. - bool matched; - - /// Used in case of duplicates. If true, then the bucketData union should be used as - /// 'duplicates'. - bool hasDuplicates; - - /// Cache of the hash for data. - /// TODO: Do we even have to cache the hash value? - uint32_t hash; - - /// Either the data for this bucket or the linked list of duplicates. - union { - HtData htdata; - DuplicateNode* duplicates; - } bucketData; - }; - -public: - class Iterator; - - /// Returns a newly allocated HashTable. The probing algorithm is set by the - /// FLAG_enable_quadratic_probing. - /// - allocator: allocator to allocate bucket directory and data pages from. - /// - stores_duplicates: true if rows with duplicate keys may be inserted into the - /// hash table. - /// - num_build_tuples: number of Tuples in the build tuple row. - /// - tuple_stream: the tuple stream which contains the tuple rows index by the - /// hash table. Can be nullptr if the rows contain only a single tuple, in which - /// case the 'tuple_stream' is unused. - /// - max_num_buckets: the maximum number of buckets that can be stored. If we - /// try to grow the number of buckets to a larger number, the inserts will fail. - /// -1, if it unlimited. - /// - initial_num_buckets: number of buckets that the hash table should be initialized - /// with. - static PartitionedHashTable* Create(Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, BufferedTupleStream3* tuple_stream, - int64_t max_num_buckets, int64_t initial_num_buckets); - - /// Allocates the initial bucket structure. Returns a non-OK status if an error is - /// encountered. If an OK status is returned , 'got_memory' is set to indicate whether - /// enough memory for the initial buckets was allocated from the Suballocator. - Status Init(bool* got_memory); - - /// Call to cleanup any resources. Must be called once. - void Close(); - - /// Inserts the row to the hash table. The caller is responsible for ensuring that the - /// table has free buckets. Returns true if the insertion was successful. Always - /// returns true if the table has free buckets and the key is not a duplicate. If the - /// key was a duplicate and memory could not be allocated for the new duplicate node, - /// returns false. If an error is encountered while creating a duplicate node, returns - /// false and sets 'status' to the error. - /// - /// 'flat_row' is a pointer to the flattened row in 'tuple_stream_' If the row contains - /// only one tuple, a pointer to that tuple is stored. Otherwise the 'flat_row' pointer - /// is stored. The 'row' is not copied by the hash table and the caller must guarantee - /// it stays in memory. This will not grow the hash table. - bool Insert(PartitionedHashTableCtx* ht_ctx, BufferedTupleStream3::FlatRowPtr flat_row, - TupleRow* row, Status* status); - - /// Prefetch the hash table bucket which the given hash value 'hash' maps to. - template - void PrefetchBucket(uint32_t hash); - - /// Returns an iterator to the bucket that matches the probe expression results that - /// are cached at the current position of the ExprValuesCache in 'ht_ctx'. Assumes that - /// the ExprValuesCache was filled using EvalAndHashProbe(). Returns HashTable::End() - /// if no match is found. The iterator can be iterated until HashTable::End() to find - /// all the matching rows. Advancing the returned iterator will go to the next matching - /// row. The matching rows do not need to be evaluated since all the nodes of a bucket - /// are duplicates. One scan can be in progress for each 'ht_ctx'. Used in the probe - /// phase of hash joins. - Iterator FindProbeRow(PartitionedHashTableCtx* ht_ctx); - - /// If a match is found in the table, return an iterator as in FindProbeRow(). If a - /// match was not present, return an iterator pointing to the empty bucket where the key - /// should be inserted. Returns End() if the table is full. The caller can set the data - /// in the bucket using a Set*() method on the iterator. - Iterator FindBuildRowBucket(PartitionedHashTableCtx* ht_ctx, bool* found); - - /// Returns number of elements inserted in the hash table - int64_t size() const { - return num_filled_buckets_ - num_buckets_with_duplicates_ + num_duplicate_nodes_; - } - - /// Returns the number of empty buckets. - int64_t EmptyBuckets() const { return num_buckets_ - num_filled_buckets_; } - - /// Returns the number of buckets - int64_t num_buckets() const { return num_buckets_; } - - /// Returns the number of filled buckets - int64_t num_filled_buckets() const { return num_filled_buckets_; } - - /// Returns the time of hash table resize - int64_t num_resize() const { return num_resizes_; } - - /// Returns the number of bucket_with_duplicates - int64_t num_buckets_with_duplicates() const { return num_buckets_with_duplicates_; } - - /// Returns the number of bucket_with_duplicates - int64_t num_duplicates_nodes() const { return num_duplicate_nodes_; } - - /// Returns the number of probe operations - int64_t num_probe() const { return num_probes_; } - - /// Returns the number of failed probe operations - int64_t num_failed_probe() const { return num_failed_probes_; } - - /// Returns the number of travel_length of probe operations - int64_t travel_length() const { return travel_length_; } - - /// Returns the load factor (the number of non-empty buckets) - double load_factor() const { return static_cast(num_filled_buckets_) / num_buckets_; } - - /// Return an estimate of the number of bytes needed to build the hash table - /// structure for 'num_rows'. To do that, it estimates the number of buckets, - /// rounded up to a power of two, and also assumes that there are no duplicates. - static int64_t EstimateNumBuckets(int64_t num_rows) { - /// Assume max 66% fill factor and no duplicates. - return BitUtil::next_power_of_two(3 * num_rows / 2); - } - static int64_t EstimateSize(int64_t num_rows) { - int64_t num_buckets = EstimateNumBuckets(num_rows); - return num_buckets * sizeof(Bucket); - } - - /// Return the size of a hash table bucket in bytes. - static int64_t BucketSize() { return sizeof(Bucket); } - - /// Returns the memory occupied by the hash table, takes into account the number of - /// duplicates. - int64_t CurrentMemSize() const; - - /// Returns the number of inserts that can be performed before resizing the table. - int64_t NumInsertsBeforeResize() const; - - /// Calculates the fill factor if 'buckets_to_fill' additional buckets were to be - /// filled and resizes the hash table so that the projected fill factor is below the - /// max fill factor. - /// If 'got_memory' is true, then it is guaranteed at least 'rows_to_add' rows can be - /// inserted without need to resize. If there is not enough memory available to - /// resize the hash table, Status::OK()() is returned and 'got_memory' is false. If a - /// another error occurs, an error status may be returned. - Status CheckAndResize(uint64_t buckets_to_fill, const PartitionedHashTableCtx* ht_ctx, - bool* got_memory); - - /// Returns the number of bytes allocated to the hash table from the block manager. - int64_t ByteSize() const { return num_buckets_ * sizeof(Bucket) + total_data_page_size_; } - - /// Returns an iterator at the beginning of the hash table. Advancing this iterator - /// will traverse all elements. - Iterator Begin(const PartitionedHashTableCtx* ht_ctx); - - /// Return an iterator pointing to the first element (Bucket or DuplicateNode, if the - /// bucket has duplicates) in the hash table that does not have its matched flag set. - /// Used in right joins and full-outer joins. - Iterator FirstUnmatched(PartitionedHashTableCtx* ctx); - - /// Return true if there was a least one match. - bool HasMatches() const { return has_matches_; } - - /// Return end marker. - Iterator End() { return Iterator(); } - - /// Dump out the entire hash table to string. If 'skip_empty', empty buckets are - /// skipped. If 'show_match', it also prints the matched flag of each node. If - /// 'build_desc' is non-null, the build rows will be printed. Otherwise, only the - /// the addresses of the build rows will be printed. - std::string DebugString(bool skip_empty, bool show_match, const RowDescriptor* build_desc); - - /// Print the content of a bucket or node. - void DebugStringTuple(std::stringstream& ss, HtData& htdata, const RowDescriptor* desc); - - /// Update and print some statistics that can be used for performance debugging. - std::string PrintStats() const; - - /// Number of hash collisions so far in the lifetime of this object - int64_t NumHashCollisions() const { return num_hash_collisions_; } - - /// stl-like iterator interface. - class Iterator { - private: - /// Bucket index value when probe is not successful. - static const int64_t BUCKET_NOT_FOUND = -1; - - public: - Iterator() - : table_(nullptr), - scratch_row_(nullptr), - bucket_idx_(BUCKET_NOT_FOUND), - node_(nullptr) {} - - /// Iterates to the next element. It should be called only if !AtEnd(). - void Next(); - - /// Iterates to the next duplicate node. If the bucket does not have duplicates or - /// when it reaches the last duplicate node, then it moves the Iterator to AtEnd(). - /// Used when we want to iterate over all the duplicate nodes bypassing the Next() - /// interface (e.g. in semi/outer joins without other_join_conjuncts, in order to - /// iterate over all nodes of an unmatched bucket). - void NextDuplicate(); - - /// Iterates to the next element that does not have its matched flag set. Used in - /// right-outer and full-outer joins. - void NextUnmatched(); - - /// Return the current row or tuple. Callers must check the iterator is not AtEnd() - /// before calling them. The returned row is owned by the iterator and valid until - /// the next call to GetRow(). It is safe to advance the iterator. - TupleRow* GetRow() const; - Tuple* GetTuple() const; - - /// Set the current tuple for an empty bucket. Designed to be used with the iterator - /// returned from FindBuildRowBucket() in the case when the value is not found. It is - /// not valid to call this function if the bucket already has an entry. - void SetTuple(Tuple* tuple, uint32_t hash); - - /// Sets as matched the Bucket or DuplicateNode currently pointed by the iterator, - /// depending on whether the bucket has duplicates or not. The iterator cannot be - /// AtEnd(). - void SetMatched(); - - /// Returns the 'matched' flag of the current Bucket or DuplicateNode, depending on - /// whether the bucket has duplicates or not. It should be called only if !AtEnd(). - bool IsMatched() const; - - /// Resets everything but the pointer to the hash table. - void SetAtEnd(); - - /// Returns true if this iterator is at the end, i.e. GetRow() cannot be called. - bool ALWAYS_INLINE AtEnd() const { return bucket_idx_ == BUCKET_NOT_FOUND; } - - /// Prefetch the hash table bucket which the iterator is pointing to now. - template - void PrefetchBucket(); - - private: - friend class PartitionedHashTable; - - ALWAYS_INLINE - Iterator(PartitionedHashTable* table, TupleRow* row, int bucket_idx, DuplicateNode* node) - : table_(table), scratch_row_(row), bucket_idx_(bucket_idx), node_(node) {} - - PartitionedHashTable* table_; - - /// Scratch buffer to hold generated rows. Not owned. - TupleRow* scratch_row_; - - /// Current bucket idx. - int64_t bucket_idx_; - - /// Pointer to the current duplicate node. - DuplicateNode* node_; - }; - -private: - friend class Iterator; - friend class HashTableTest; - - /// Hash table constructor. Private because Create() should be used, instead - /// of calling this constructor directly. - /// - quadratic_probing: set to true when the probing algorithm is quadratic, as - /// opposed to linear. - PartitionedHashTable(bool quadratic_probing, Suballocator* allocator, bool stores_duplicates, - int num_build_tuples, BufferedTupleStream3* tuple_stream, - int64_t max_num_buckets, int64_t initial_num_buckets); - - /// Performs the probing operation according to the probing algorithm (linear or - /// quadratic. Returns one of the following: - /// (a) the index of the bucket that contains the entry that matches with the last row - /// evaluated in 'ht_ctx'. If 'ht_ctx' is nullptr then it does not check for row - /// equality and returns the index of the first empty bucket. - /// (b) the index of the first empty bucket according to the probing algorithm (linear - /// or quadratic), if the entry is not in the hash table or 'ht_ctx' is nullptr. - /// (c) Iterator::BUCKET_NOT_FOUND if the probe was not successful, i.e. the maximum - /// distance was traveled without finding either an empty or a matching bucket. - /// Using the returned index value, the caller can create an iterator that can be - /// iterated until End() to find all the matching rows. - /// - /// EvalAndHashBuild() or EvalAndHashProbe() must have been called before calling - /// this function. The values of the expression values cache in 'ht_ctx' will be - /// used to probe the hash table. - /// - /// 'FORCE_NULL_EQUALITY' is true if NULLs should always be considered equal when - /// comparing two rows. - /// - /// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe(). - /// 'found' indicates that a bucket that contains an equal row is found. - /// - /// There are wrappers of this function that perform the Find and Insert logic. - template - int64_t Probe(Bucket* buckets, int64_t num_buckets, PartitionedHashTableCtx* ht_ctx, - uint32_t hash, bool* found); - - /// Performs the insert logic. Returns the HtData* of the bucket or duplicate node - /// where the data should be inserted. Returns nullptr if the insert was not successful - /// and either sets 'status' to OK if it failed because not enough reservation was - /// available or the error if an error was encountered. - HtData* InsertInternal(PartitionedHashTableCtx* ht_ctx, Status* status); - - /// Updates 'bucket_idx' to the index of the next non-empty bucket. If the bucket has - /// duplicates, 'node' will be pointing to the head of the linked list of duplicates. - /// Otherwise, 'node' should not be used. If there are no more buckets, sets - /// 'bucket_idx' to BUCKET_NOT_FOUND. - void NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node); - - /// Resize the hash table to 'num_buckets'. 'got_memory' is false on OOM. - Status ResizeBuckets(int64_t num_buckets, const PartitionedHashTableCtx* ht_ctx, - bool* got_memory); - - /// Appends the DuplicateNode pointed by next_node_ to 'bucket' and moves the next_node_ - /// pointer to the next DuplicateNode in the page, updating the remaining node counter. - DuplicateNode* AppendNextNode(Bucket* bucket); - - /// Creates a new DuplicateNode for a entry and chains it to the bucket with index - /// 'bucket_idx'. The duplicate nodes of a bucket are chained as a linked list. - /// This places the new duplicate node at the beginning of the list. If this is the - /// first duplicate entry inserted in this bucket, then the entry already contained by - /// the bucket is converted to a DuplicateNode. That is, the contents of 'data' of the - /// bucket are copied to a DuplicateNode and 'data' is updated to pointing to a - /// DuplicateNode. - /// Returns nullptr and sets 'status' to OK if the node array could not grow, i.e. there - /// was not enough memory to allocate a new DuplicateNode. Returns nullptr and sets - /// 'status' to an error if another error was encountered. - DuplicateNode* InsertDuplicateNode(int64_t bucket_idx, Status* status); - - /// Resets the contents of the empty bucket with index 'bucket_idx', in preparation for - /// an insert. Sets all the fields of the bucket other than 'data'. - void PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash); - - /// Return the TupleRow pointed by 'htdata'. - TupleRow* GetRow(HtData& htdata, TupleRow* row) const; - - /// Returns the TupleRow of the pointed 'bucket'. In case of duplicates, it - /// returns the content of the first chained duplicate node of the bucket. - TupleRow* GetRow(Bucket* bucket, TupleRow* row) const; - - /// Grow the node array. Returns true and sets 'status' to OK on success. Returns false - /// and set 'status' to OK if we can't get sufficient reservation to allocate the next - /// data page. Returns false and sets 'status' if another error is encountered. - bool GrowNodeArray(Status* status); - - /// Functions to be replaced by codegen to specialize the hash table. - bool stores_tuples() const { return stores_tuples_; } - bool stores_duplicates() const { return stores_duplicates_; } - bool quadratic_probing() const { return quadratic_probing_; } - - /// Load factor that will trigger growing the hash table on insert. This is - /// defined as the number of non-empty buckets / total_buckets - static constexpr double MAX_FILL_FACTOR = 0.75; - - /// The size in bytes of each page of duplicate nodes. Should be large enough to fit - /// enough DuplicateNodes to amortise the overhead of allocating each page and low - /// enough to not waste excessive memory to internal fragmentation. - static constexpr int64_t DATA_PAGE_SIZE = 64L * 1024; - - RuntimeState* state_; - - /// Suballocator to allocate data pages and hash table buckets with. - Suballocator* allocator_; - - /// Stream contains the rows referenced by the hash table. Can be nullptr if the - /// row only contains a single tuple, in which case the TupleRow indirection - /// is removed by the hash table. - BufferedTupleStream3* tuple_stream_; - - /// Constants on how the hash table should behave. - - /// True if the HtData uses the Tuple* representation, or false if it uses FlatRowPtr. - const bool stores_tuples_; - - /// True if duplicates may be inserted into hash table. - const bool stores_duplicates_; - - /// Quadratic probing enabled (as opposed to linear). - const bool quadratic_probing_; - - /// Data pages for all nodes. Allocated from suballocator to reduce memory - /// consumption of small tables. - std::vector> data_pages_; - - /// Byte size of all buffers in data_pages_. - int64_t total_data_page_size_; - - /// Next duplicate node to insert. Valid when node_remaining_current_page_ > 0. - DuplicateNode* next_node_; - - /// Number of nodes left in the current page. - int node_remaining_current_page_; - - /// Number of duplicate nodes. - int64_t num_duplicate_nodes_; - - const int64_t max_num_buckets_; - - /// Allocation containing all buckets. - std::unique_ptr bucket_allocation_; - - /// Pointer to the 'buckets_' array from 'bucket_allocation_'. - Bucket* buckets_; - - /// Total number of buckets (filled and empty). - int64_t num_buckets_; - - /// Number of non-empty buckets. Used to determine when to resize. - int64_t num_filled_buckets_; - - /// Number of (non-empty) buckets with duplicates. These buckets do not point to slots - /// in the tuple stream, rather than to a linked list of Nodes. - int64_t num_buckets_with_duplicates_; - - /// Number of build tuples, used for constructing temp row* for probes. - const int num_build_tuples_; - - /// Flag used to check that we don't lose stored matches when spilling hash tables - /// (IMPALA-1488). - bool has_matches_; - - /// The stats below can be used for debugging perf. - /// TODO: Should we make these statistics atomic? - /// Number of FindProbeRow(), Insert(), or FindBuildRowBucket() calls that probe the - /// hash table. - int64_t num_probes_; - - /// Number of probes that failed and had to fall back to linear probing without cap. - int64_t num_failed_probes_; - - /// Total distance traveled for each probe. That is the sum of the diff between the end - /// position of a probe (find/insert) and its start position - /// (hash & (num_buckets_ - 1)). - int64_t travel_length_; - - /// The number of cases where we had to compare buckets with the same hash value, but - /// the row equality failed. - int64_t num_hash_collisions_; - - /// How many times this table has resized so far. - int64_t num_resizes_; -}; - -} // namespace doris diff --git a/be/src/exec/partitioned_hash_table.inline.h b/be/src/exec/partitioned_hash_table.inline.h deleted file mode 100644 index 1f789ce821..0000000000 --- a/be/src/exec/partitioned_hash_table.inline.h +++ /dev/null @@ -1,406 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/partitioned-hash-table.inline.h -// and modified by Doris - -#pragma once - -#include "exec/partitioned_hash_table.h" - -namespace doris { - -inline bool PartitionedHashTableCtx::EvalAndHashBuild(TupleRow* row) { - uint8_t* expr_values = expr_values_cache_.cur_expr_values(); - uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); - bool has_null = EvalBuildRow(row, expr_values, expr_values_null); - if (!stores_nulls() && has_null) return false; - expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); - return true; -} - -inline bool PartitionedHashTableCtx::EvalAndHashProbe(TupleRow* row) { - uint8_t* expr_values = expr_values_cache_.cur_expr_values(); - uint8_t* expr_values_null = expr_values_cache_.cur_expr_values_null(); - bool has_null = EvalProbeRow(row, expr_values, expr_values_null); - if (has_null && !(stores_nulls() && finds_some_nulls())) return false; - expr_values_cache_.SetCurExprValuesHash(HashRow(expr_values, expr_values_null)); - return true; -} - -inline void PartitionedHashTableCtx::ExprValuesCache::NextRow() { - cur_expr_values_ += expr_values_bytes_per_row_; - cur_expr_values_null_ += num_exprs_; - ++cur_expr_values_hash_; - DCHECK_LE(cur_expr_values_hash_ - expr_values_hash_array_.get(), capacity_); -} - -template -inline int64_t PartitionedHashTable::Probe(Bucket* buckets, int64_t num_buckets, - PartitionedHashTableCtx* ht_ctx, uint32_t hash, - bool* found) { - DCHECK(buckets != nullptr); - DCHECK_GT(num_buckets, 0); - *found = false; - int64_t bucket_idx = hash & (num_buckets - 1); - - // In case of linear probing it counts the total number of steps for statistics and - // for knowing when to exit the loop (e.g. by capping the total travel length). In case - // of quadratic probing it is also used for calculating the length of the next jump. - int64_t step = 0; - do { - Bucket* bucket = &buckets[bucket_idx]; - if (LIKELY(!bucket->filled)) return bucket_idx; - if (hash == bucket->hash) { - if (ht_ctx != nullptr && - ht_ctx->Equals(GetRow(bucket, ht_ctx->scratch_row_))) { - *found = true; - return bucket_idx; - } - // Row equality failed, or not performed. This is a hash collision. Continue - // searching. - ++num_hash_collisions_; - } - // Move to the next bucket. - ++step; - ++travel_length_; - if (quadratic_probing()) { - // The i-th probe location is idx = (hash + (step * (step + 1)) / 2) mod num_buckets. - // This gives num_buckets unique idxs (between 0 and N-1) when num_buckets is a power - // of 2. - bucket_idx = (bucket_idx + step) & (num_buckets - 1); - } else { - bucket_idx = (bucket_idx + 1) & (num_buckets - 1); - } - } while (LIKELY(step < num_buckets)); - DCHECK_EQ(num_filled_buckets_, num_buckets) << "Probing of a non-full table " - << "failed: " << quadratic_probing() << " " << hash; - return Iterator::BUCKET_NOT_FOUND; -} - -inline PartitionedHashTable::HtData* PartitionedHashTable::InsertInternal( - PartitionedHashTableCtx* ht_ctx, Status* status) { - ++num_probes_; - bool found = false; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); - DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND); - if (found) { - // We need to insert a duplicate node, note that this may fail to allocate memory. - DuplicateNode* new_node = InsertDuplicateNode(bucket_idx, status); - if (UNLIKELY(new_node == nullptr)) return nullptr; - return &new_node->htdata; - } else { - PrepareBucketForInsert(bucket_idx, hash); - return &buckets_[bucket_idx].bucketData.htdata; - } -} - -inline bool PartitionedHashTable::Insert(PartitionedHashTableCtx* ht_ctx, - BufferedTupleStream3::FlatRowPtr flat_row, TupleRow* row, - Status* status) { - HtData* htdata = InsertInternal(ht_ctx, status); - // If successful insert, update the contents of the newly inserted entry with 'idx'. - if (LIKELY(htdata != nullptr)) { - if (stores_tuples()) { - htdata->tuple = row->get_tuple(0); - } else { - htdata->flat_row = flat_row; - } - return true; - } - return false; -} - -template -inline void PartitionedHashTable::PrefetchBucket(uint32_t hash) { - int64_t bucket_idx = hash & (num_buckets_ - 1); - // Two optional arguments: - // 'rw': 1 means the memory access is write - // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality. - // On x86, they map to instructions prefetchnta and prefetch{2-0} respectively. - // TODO: Reconsider the locality level with smaller prefetch batch size. - __builtin_prefetch(&buckets_[bucket_idx], READ ? 0 : 1, 1); -} - -inline PartitionedHashTable::Iterator PartitionedHashTable::FindProbeRow( - PartitionedHashTableCtx* ht_ctx) { - ++num_probes_; - bool found = false; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, &found); - if (found) { - return Iterator(this, ht_ctx->scratch_row(), bucket_idx, - stores_duplicates() ? buckets_[bucket_idx].bucketData.duplicates : nullptr); - } - return End(); -} - -// TODO: support lazy evaluation like HashTable::Insert(). -inline PartitionedHashTable::Iterator PartitionedHashTable::FindBuildRowBucket( - PartitionedHashTableCtx* ht_ctx, bool* found) { - ++num_probes_; - uint32_t hash = ht_ctx->expr_values_cache()->CurExprValuesHash(); - int64_t bucket_idx = Probe(buckets_, num_buckets_, ht_ctx, hash, found); - DuplicateNode* duplicates = nullptr; - if (stores_duplicates() && LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND)) { - duplicates = buckets_[bucket_idx].bucketData.duplicates; - } - return Iterator(this, ht_ctx->scratch_row(), bucket_idx, duplicates); -} - -inline PartitionedHashTable::Iterator PartitionedHashTable::Begin( - const PartitionedHashTableCtx* ctx) { - int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; - DuplicateNode* node = nullptr; - NextFilledBucket(&bucket_idx, &node); - return Iterator(this, ctx->scratch_row(), bucket_idx, node); -} - -inline PartitionedHashTable::Iterator PartitionedHashTable::FirstUnmatched( - PartitionedHashTableCtx* ctx) { - int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND; - DuplicateNode* node = nullptr; - NextFilledBucket(&bucket_idx, &node); - Iterator it(this, ctx->scratch_row(), bucket_idx, node); - // Check whether the bucket, or its first duplicate node, is matched. If it is not - // matched, then return. Otherwise, move to the first unmatched entry (node or bucket). - Bucket* bucket = &buckets_[bucket_idx]; - bool has_duplicates = stores_duplicates() && bucket->hasDuplicates; - if ((!has_duplicates && bucket->matched) || (has_duplicates && node->matched)) { - it.NextUnmatched(); - } - return it; -} - -inline void PartitionedHashTable::NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node) { - ++*bucket_idx; - for (; *bucket_idx < num_buckets_; ++*bucket_idx) { - if (buckets_[*bucket_idx].filled) { - *node = stores_duplicates() ? buckets_[*bucket_idx].bucketData.duplicates : nullptr; - return; - } - } - // Reached the end of the hash table. - *bucket_idx = Iterator::BUCKET_NOT_FOUND; - *node = nullptr; -} - -inline void PartitionedHashTable::PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash) { - DCHECK_GE(bucket_idx, 0); - DCHECK_LT(bucket_idx, num_buckets_); - Bucket* bucket = &buckets_[bucket_idx]; - DCHECK(!bucket->filled); - ++num_filled_buckets_; - bucket->filled = true; - bucket->matched = false; - bucket->hasDuplicates = false; - bucket->hash = hash; -} - -inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::AppendNextNode(Bucket* bucket) { - DCHECK_GT(node_remaining_current_page_, 0); - bucket->bucketData.duplicates = next_node_; - ++num_duplicate_nodes_; - --node_remaining_current_page_; - return next_node_++; -} - -inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::InsertDuplicateNode( - int64_t bucket_idx, Status* status) { - DCHECK_GE(bucket_idx, 0); - DCHECK_LT(bucket_idx, num_buckets_); - Bucket* bucket = &buckets_[bucket_idx]; - DCHECK(bucket->filled); - DCHECK(stores_duplicates()); - // Allocate one duplicate node for the new data and one for the preexisting data, - // if needed. - while (node_remaining_current_page_ < 1 + !bucket->hasDuplicates) { - if (UNLIKELY(!GrowNodeArray(status))) return nullptr; - } - if (!bucket->hasDuplicates) { - // This is the first duplicate in this bucket. It means that we need to convert - // the current entry in the bucket to a node and link it from the bucket. - next_node_->htdata.flat_row = bucket->bucketData.htdata.flat_row; - DCHECK(!bucket->matched); - next_node_->matched = false; - next_node_->next = nullptr; - AppendNextNode(bucket); - bucket->hasDuplicates = true; - ++num_buckets_with_duplicates_; - } - // Link a new node. - next_node_->next = bucket->bucketData.duplicates; - next_node_->matched = false; - return AppendNextNode(bucket); -} - -inline TupleRow* PartitionedHashTable::GetRow(HtData& htdata, TupleRow* row) const { - if (stores_tuples()) { - return reinterpret_cast(&htdata.tuple); - } else { - // TODO: GetTupleRow() has interpreted code that iterates over the row's descriptor. - tuple_stream_->GetTupleRow(htdata.flat_row, row); - return row; - } -} - -inline TupleRow* PartitionedHashTable::GetRow(Bucket* bucket, TupleRow* row) const { - DCHECK(bucket != nullptr); - if (UNLIKELY(stores_duplicates() && bucket->hasDuplicates)) { - DuplicateNode* duplicate = bucket->bucketData.duplicates; - DCHECK(duplicate != nullptr); - return GetRow(duplicate->htdata, row); - } else { - return GetRow(bucket->bucketData.htdata, row); - } -} - -inline TupleRow* PartitionedHashTable::Iterator::GetRow() const { - DCHECK(!AtEnd()); - DCHECK(table_ != nullptr); - DCHECK(scratch_row_ != nullptr); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { - DCHECK(node_ != nullptr); - return table_->GetRow(node_->htdata, scratch_row_); - } else { - return table_->GetRow(bucket->bucketData.htdata, scratch_row_); - } -} - -inline Tuple* PartitionedHashTable::Iterator::GetTuple() const { - DCHECK(!AtEnd()); - DCHECK(table_->stores_tuples()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - // TODO: To avoid the hasDuplicates check, store the HtData* in the Iterator. - if (UNLIKELY(table_->stores_duplicates() && bucket->hasDuplicates)) { - DCHECK(node_ != nullptr); - return node_->htdata.tuple; - } else { - return bucket->bucketData.htdata.tuple; - } -} - -inline void PartitionedHashTable::Iterator::SetTuple(Tuple* tuple, uint32_t hash) { - DCHECK(!AtEnd()); - DCHECK(table_->stores_tuples()); - table_->PrepareBucketForInsert(bucket_idx_, hash); - table_->buckets_[bucket_idx_].bucketData.htdata.tuple = tuple; -} - -inline void PartitionedHashTable::Iterator::SetMatched() { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (table_->stores_duplicates() && bucket->hasDuplicates) { - node_->matched = true; - } else { - bucket->matched = true; - } - // Used for disabling spilling of hash tables in right and full-outer joins with - // matches. See IMPALA-1488. - table_->has_matches_ = true; -} - -inline bool PartitionedHashTable::Iterator::IsMatched() const { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - if (table_->stores_duplicates() && bucket->hasDuplicates) { - return node_->matched; - } - return bucket->matched; -} - -inline void PartitionedHashTable::Iterator::SetAtEnd() { - bucket_idx_ = BUCKET_NOT_FOUND; - node_ = nullptr; -} - -template -inline void PartitionedHashTable::Iterator::PrefetchBucket() { - if (LIKELY(!AtEnd())) { - // HashTable::PrefetchBucket() takes a hash value to index into the hash bucket - // array. Passing 'bucket_idx_' here is sufficient. - DCHECK_EQ((bucket_idx_ & ~(table_->num_buckets_ - 1)), 0); - table_->PrefetchBucket(bucket_idx_); - } -} - -inline void PartitionedHashTable::Iterator::Next() { - DCHECK(!AtEnd()); - if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && - node_->next != nullptr) { - node_ = node_->next; - } else { - table_->NextFilledBucket(&bucket_idx_, &node_); - } -} - -inline void PartitionedHashTable::Iterator::NextDuplicate() { - DCHECK(!AtEnd()); - if (table_->stores_duplicates() && table_->buckets_[bucket_idx_].hasDuplicates && - node_->next != nullptr) { - node_ = node_->next; - } else { - bucket_idx_ = BUCKET_NOT_FOUND; - node_ = nullptr; - } -} - -inline void PartitionedHashTable::Iterator::NextUnmatched() { - DCHECK(!AtEnd()); - Bucket* bucket = &table_->buckets_[bucket_idx_]; - // Check if there is any remaining unmatched duplicate node in the current bucket. - if (table_->stores_duplicates() && bucket->hasDuplicates) { - while (node_->next != nullptr) { - node_ = node_->next; - if (!node_->matched) return; - } - } - // Move to the next filled bucket and return if this bucket is not matched or - // iterate to the first not matched duplicate node. - table_->NextFilledBucket(&bucket_idx_, &node_); - while (bucket_idx_ != Iterator::BUCKET_NOT_FOUND) { - bucket = &table_->buckets_[bucket_idx_]; - if (!table_->stores_duplicates() || !bucket->hasDuplicates) { - if (!bucket->matched) return; - } else { - while (node_->matched && node_->next != nullptr) { - node_ = node_->next; - } - if (!node_->matched) return; - } - table_->NextFilledBucket(&bucket_idx_, &node_); - } -} - -inline void PartitionedHashTableCtx::set_level(int level) { - DCHECK_GE(level, 0); - DCHECK_LT(level, seeds_.size()); - level_ = level; -} - -inline int64_t PartitionedHashTable::CurrentMemSize() const { - return num_buckets_ * sizeof(Bucket) + num_duplicate_nodes_ * sizeof(DuplicateNode); -} - -inline int64_t PartitionedHashTable::NumInsertsBeforeResize() const { - return std::max( - 0, static_cast(num_buckets_ * MAX_FILL_FACTOR) - num_filled_buckets_); -} - -} // namespace doris diff --git a/be/src/exec/repeat_node.cpp b/be/src/exec/repeat_node.cpp deleted file mode 100644 index 6db760db7f..0000000000 --- a/be/src/exec/repeat_node.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/repeat_node.h" - -#include "gutil/strings/join.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" - -namespace doris { - -RepeatNode::RepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _slot_id_set_list(tnode.repeat_node.slot_id_set_list), - _all_slot_ids(tnode.repeat_node.all_slot_ids), - _repeat_id_list(tnode.repeat_node.repeat_id_list), - _grouping_list(tnode.repeat_node.grouping_list), - _output_tuple_id(tnode.repeat_node.output_tuple_id), - _output_tuple_desc(nullptr), - _child_row_batch(nullptr), - _child_eos(false), - _repeat_id_idx(0), - _runtime_state(nullptr) {} - -RepeatNode::~RepeatNode() {} - -Status RepeatNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(Expr::create(tnode.repeat_node.exprs, row_desc, state, &_exprs)); - DCHECK(!_exprs.empty()); - return Status::OK(); -} - -Status RepeatNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _runtime_state = state; - _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); - if (_output_tuple_desc == nullptr) { - return Status::InternalError("Failed to get tuple descriptor."); - } - - for (int i = 0; i < _exprs.size(); i++) { - ExprContext* context = _pool->add(new ExprContext(_exprs[i])); - RETURN_IF_ERROR(context->prepare(state, child(0)->row_desc())); - _expr_evals.push_back(context); - } - DCHECK_EQ(_exprs.size(), _expr_evals.size()); - return Status::OK(); -} - -Status RepeatNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - for (int i = 0; i < _expr_evals.size(); i++) { - RETURN_IF_ERROR(_expr_evals[i]->open(state)); - } - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(child(0)->open(state)); - return Status::OK(); -} - -/** - * copy the rows to new tuple based on repeat_id_idx and _repeat_id_list and fill in row_batch, - * and then set grouping_id and other grouping function slot in child_row_batch - * e.g. _repeat_id_list = [0, 3, 1, 2], _repeat_id_idx = 2, _grouping_list [[0, 3, 1, 2], [0, 1, 1, 0]], - * row_batch tuple 0 ['a', 'b', 1] -> [['a', null, 1] tuple 1 [1, 1]] - */ -Status RepeatNode::get_repeated_batch(RowBatch* child_row_batch, int repeat_id_idx, - RowBatch* row_batch) { - DCHECK(child_row_batch != nullptr); - DCHECK_EQ(row_batch->num_rows(), 0); - - // Fill all slots according to child - MemPool* tuple_pool = row_batch->tuple_data_pool(); - Tuple* tuple = nullptr; - for (int row_index = 0; row_index < child_row_batch->num_rows(); ++row_index) { - int row_idx = row_batch->add_row(); - TupleRow* dst_row = row_batch->get_row(row_idx); - TupleRow* src_row = child_row_batch->get_row(row_index); - - if (UNLIKELY(tuple == nullptr)) { - int size = row_batch->capacity() * _output_tuple_desc->byte_size(); - void* tuple_buffer = tuple_pool->allocate(size); - if (tuple_buffer == nullptr) { - return Status::InternalError("Allocate memory for row batch failed."); - } - tuple = reinterpret_cast(tuple_buffer); - } else { - char* new_tuple = reinterpret_cast(tuple); - new_tuple += _output_tuple_desc->byte_size(); - tuple = reinterpret_cast(new_tuple); - } - dst_row->set_tuple(0, tuple); - memset(tuple, 0, _output_tuple_desc->num_null_bytes()); - - int slot_index = 0; - for (; slot_index < _expr_evals.size(); ++slot_index) { - const SlotDescriptor* slot_desc = _output_tuple_desc->slots()[slot_index]; - // set null base on repeated list - if (_all_slot_ids.find(slot_desc->id()) != _all_slot_ids.end()) { - std::set& repeat_ids = _slot_id_set_list[repeat_id_idx]; - if (repeat_ids.find(slot_desc->id()) == repeat_ids.end()) { - tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } - } - - void* val = _expr_evals[slot_index]->get_value(src_row); - tuple->set_not_null(slot_desc->null_indicator_offset()); - RawValue::write(val, tuple, slot_desc, tuple_pool); - } - - DCHECK_EQ(slot_index + _grouping_list.size(), _output_tuple_desc->slots().size()); - for (int i = 0; slot_index < _output_tuple_desc->slots().size(); ++i, ++slot_index) { - const SlotDescriptor* slot_desc = _output_tuple_desc->slots()[slot_index]; - tuple->set_not_null(slot_desc->null_indicator_offset()); - - int64_t val = _grouping_list[i][repeat_id_idx]; - RawValue::write(&val, tuple, slot_desc, tuple_pool); - } - row_batch->commit_last_row(); - } - - return Status::OK(); -} - -Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - DCHECK(_repeat_id_idx >= 0); - for (const std::vector& v : _grouping_list) { - DCHECK(_repeat_id_idx <= (int)v.size()); - } - // current child has finished its repeat, get child's next batch - if (_child_row_batch.get() == nullptr) { - if (_child_eos) { - *eos = true; - return Status::OK(); - } - - _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos)); - - if (_child_row_batch->num_rows() <= 0) { - _child_row_batch.reset(nullptr); - *eos = true; - return Status::OK(); - } - } - - DCHECK_EQ(row_batch->num_rows(), 0); - RETURN_IF_ERROR(get_repeated_batch(_child_row_batch.get(), _repeat_id_idx, row_batch)); - _repeat_id_idx++; - - int size = _repeat_id_list.size(); - if (_repeat_id_idx >= size) { - _child_row_batch.reset(nullptr); - _repeat_id_idx = 0; - } - - return Status::OK(); -} - -Status RepeatNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - _child_row_batch.reset(nullptr); - for (int i = 0; i < _expr_evals.size(); i++) { - _expr_evals[i]->close(state); - } - _expr_evals.clear(); - Expr::close(_exprs); - RETURN_IF_ERROR(child(0)->close(state)); - return ExecNode::close(state); -} - -void RepeatNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "RepeatNode("; - *out << "repeat pattern: [" << JoinElements(_repeat_id_list, ",") << "]\n"; - *out << "add " << _grouping_list.size() << " columns. \n"; - *out << "_exprs: " << Expr::debug_string(_exprs); - *out << "added column values: "; - for (const std::vector& v : _grouping_list) { - *out << "[" << JoinElements(v, ",") << "] "; - } - *out << "\n"; - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -} // namespace doris \ No newline at end of file diff --git a/be/src/exec/repeat_node.h b/be/src/exec/repeat_node.h deleted file mode 100644 index 9c43a33c86..0000000000 --- a/be/src/exec/repeat_node.h +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" - -namespace doris { - -class Tuple; -class RuntimeState; -class RowBatch; - -// repeat tuple of children and set given slots to null, this class generates tuple rows according to the given -// _repeat_id_list, and sets the value of the slot corresponding to the grouping function according to _grouping_list -class RepeatNode : public ExecNode { -public: - RepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~RepeatNode(); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - virtual Status prepare(RuntimeState* state) override; - virtual Status open(RuntimeState* state) override; - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - virtual Status close(RuntimeState* state) override; - -protected: - virtual void debug_string(int indentation_level, std::stringstream* out) const override; - -protected: - Status get_repeated_batch(RowBatch* child_row_batch, int repeat_id_idx, RowBatch* row_batch); - - // Slot id set used to indicate those slots need to set to null. - std::vector> _slot_id_set_list; - // all slot id - std::set _all_slot_ids; - // An integer bitmap list, it indicates the bit position of the exprs not null. - std::vector _repeat_id_list; - std::vector> _grouping_list; - // Tuple id used for output, it has new slots. - TupleId _output_tuple_id; - const TupleDescriptor* _output_tuple_desc; - - std::unique_ptr _child_row_batch; - bool _child_eos; - int _repeat_id_idx; - RuntimeState* _runtime_state; - - // Exprs used to evaluate input rows - std::vector _exprs; - - std::vector _expr_evals; -}; - -} // namespace doris diff --git a/be/src/exec/schema_scan_node.cpp b/be/src/exec/schema_scan_node.cpp deleted file mode 100644 index af0da699b4..0000000000 --- a/be/src/exec/schema_scan_node.cpp +++ /dev/null @@ -1,336 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "schema_scan_node.h" - -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" -#include "util/string_util.h" - -namespace doris { - -SchemaScanNode::SchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ScanNode(pool, tnode, descs), - _is_init(false), - _table_name(tnode.schema_scan_node.table_name), - _tuple_id(tnode.schema_scan_node.tuple_id), - _src_tuple_desc(nullptr), - _dest_tuple_desc(nullptr), - _tuple_idx(0), - _slot_num(0), - _tuple_pool(nullptr), - _schema_scanner(nullptr), - _src_tuple(nullptr), - _dest_tuple(nullptr) {} - -SchemaScanNode::~SchemaScanNode() { - delete[] reinterpret_cast(_src_tuple); - _src_tuple = nullptr; -} - -Status SchemaScanNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - if (tnode.schema_scan_node.__isset.db) { - _scanner_param.db = _pool->add(new std::string(tnode.schema_scan_node.db)); - } - - if (tnode.schema_scan_node.__isset.table) { - _scanner_param.table = _pool->add(new std::string(tnode.schema_scan_node.table)); - } - - if (tnode.schema_scan_node.__isset.wild) { - _scanner_param.wild = _pool->add(new std::string(tnode.schema_scan_node.wild)); - } - - if (tnode.schema_scan_node.__isset.current_user_ident) { - _scanner_param.current_user_ident = - _pool->add(new TUserIdentity(tnode.schema_scan_node.current_user_ident)); - } else { - if (tnode.schema_scan_node.__isset.user) { - _scanner_param.user = _pool->add(new std::string(tnode.schema_scan_node.user)); - } - if (tnode.schema_scan_node.__isset.user_ip) { - _scanner_param.user_ip = _pool->add(new std::string(tnode.schema_scan_node.user_ip)); - } - } - - if (tnode.schema_scan_node.__isset.ip) { - _scanner_param.ip = _pool->add(new std::string(tnode.schema_scan_node.ip)); - } - if (tnode.schema_scan_node.__isset.port) { - _scanner_param.port = tnode.schema_scan_node.port; - } - - if (tnode.schema_scan_node.__isset.thread_id) { - _scanner_param.thread_id = tnode.schema_scan_node.thread_id; - } - - if (tnode.schema_scan_node.__isset.table_structure) { - _scanner_param.table_structure = _pool->add( - new std::vector(tnode.schema_scan_node.table_structure)); - } - - if (tnode.schema_scan_node.__isset.catalog) { - _scanner_param.catalog = _pool->add(new std::string(tnode.schema_scan_node.catalog)); - } - return Status::OK(); -} - -Status SchemaScanNode::prepare(RuntimeState* state) { - if (_is_init) { - return Status::OK(); - } - - if (nullptr == state) { - return Status::InternalError("input pointer is nullptr."); - } - - RETURN_IF_ERROR(ScanNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - // new one mem pool - _tuple_pool.reset(new (std::nothrow) MemPool()); - - if (nullptr == _tuple_pool.get()) { - return Status::InternalError("Allocate MemPool failed."); - } - - // get dest tuple desc - _dest_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - - if (nullptr == _dest_tuple_desc) { - return Status::InternalError("Failed to get tuple descriptor."); - } - - _slot_num = _dest_tuple_desc->slots().size(); - // get src tuple desc - const SchemaTableDescriptor* schema_table = - static_cast(_dest_tuple_desc->table_desc()); - - if (nullptr == schema_table) { - return Status::InternalError("Failed to get schema table descriptor."); - } - - // new one scanner - _schema_scanner.reset(SchemaScanner::create(schema_table->schema_table_type())); - - if (nullptr == _schema_scanner.get()) { - return Status::InternalError("schema scanner get nullptr pointer."); - } - - RETURN_IF_ERROR(_schema_scanner->init(&_scanner_param, _pool)); - // get column info from scanner - _src_tuple_desc = _schema_scanner->tuple_desc(); - - if (nullptr == _src_tuple_desc) { - return Status::InternalError("failed to get src schema tuple desc."); - } - - _src_tuple = reinterpret_cast(new (std::nothrow) char[_src_tuple_desc->byte_size()]); - - if (nullptr == _src_tuple) { - return Status::InternalError("new src tuple failed."); - } - - // if src tuple desc slots is zero, it's the dummy slots. - if (0 == _src_tuple_desc->slots().size()) { - _slot_num = 0; - } - - // check if type is ok. - if (_slot_num > 0) { - _index_map.resize(_slot_num); - } - for (int i = 0; i < _slot_num; ++i) { - // TODO(zhaochun): Is this slow? - int j = 0; - for (; j < _src_tuple_desc->slots().size(); ++j) { - if (iequal(_dest_tuple_desc->slots()[i]->col_name(), - _src_tuple_desc->slots()[j]->col_name())) { - break; - } - } - - if (j >= _src_tuple_desc->slots().size()) { - LOG(WARNING) << "no match column for this column(" - << _dest_tuple_desc->slots()[i]->col_name() << ")"; - return Status::InternalError("no match column for this column."); - } - - if (_src_tuple_desc->slots()[j]->type().type != _dest_tuple_desc->slots()[i]->type().type) { - LOG(WARNING) << "schema not match. input is " << _src_tuple_desc->slots()[j]->col_name() - << "(" << _src_tuple_desc->slots()[j]->type() << ") and output is " - << _dest_tuple_desc->slots()[i]->col_name() << "(" - << _dest_tuple_desc->slots()[i]->type() << ")"; - return Status::InternalError("schema not match."); - } - _index_map[i] = j; - } - - // TODO(marcel): add int _tuple_idx indexed by TupleId somewhere in runtime_state.h - _tuple_idx = 0; - _is_init = true; - - return Status::OK(); -} - -Status SchemaScanNode::open(RuntimeState* state) { - if (!_is_init) { - return Status::InternalError("Open before Init."); - } - - if (nullptr == state) { - return Status::InternalError("input pointer is nullptr."); - } - - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (_scanner_param.user) { - TSetSessionParams param; - param.__set_user(*_scanner_param.user); - //TStatus t_status; - //RETURN_IF_ERROR(SchemaJniHelper::set_session(param, &t_status)); - //RETURN_IF_ERROR(Status(t_status)); - } - - return _schema_scanner->start(state); -} - -void SchemaScanNode::copy_one_row() { - memset(_dest_tuple, 0, _dest_tuple_desc->num_null_bytes()); - - for (int i = 0; i < _slot_num; ++i) { - if (!_dest_tuple_desc->slots()[i]->is_materialized()) { - continue; - } - int j = _index_map[i]; - - if (_src_tuple->is_null(_src_tuple_desc->slots()[j]->null_indicator_offset())) { - _dest_tuple->set_null(_dest_tuple_desc->slots()[i]->null_indicator_offset()); - } else { - void* dest_slot = _dest_tuple->get_slot(_dest_tuple_desc->slots()[i]->tuple_offset()); - void* src_slot = _src_tuple->get_slot(_src_tuple_desc->slots()[j]->tuple_offset()); - int slot_size = _src_tuple_desc->slots()[j]->type().get_slot_size(); - memcpy(dest_slot, src_slot, slot_size); - } - } -} - -Status SchemaScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - if (!_is_init) { - return Status::InternalError("GetNext before Init."); - } - - if (nullptr == state || nullptr == row_batch || nullptr == eos) { - return Status::InternalError("input pointer is nullptr."); - } - - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } - - // create new tuple buffer for row_batch - int tuple_buffer_size = row_batch->capacity() * _dest_tuple_desc->byte_size(); - void* tuple_buffer = _tuple_pool->allocate(tuple_buffer_size); - - if (nullptr == tuple_buffer) { - return Status::InternalError("Allocate tuple buffer failed."); - } - - // no use to clear, because CopyOneRow can clear - _dest_tuple = reinterpret_cast(tuple_buffer); - // Indicates whether there are more rows to process. Set in _schema_scanner.get_next(). - bool scanner_eos = false; - - while (true) { - RETURN_IF_CANCELLED(state); - - if (reached_limit() || row_batch->is_full()) { - // hang on to last allocated chunk in pool, we'll keep writing into it in the - // next get_next() call - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), !reached_limit()); - *eos = reached_limit(); - return Status::OK(); - } - - RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_tuple, _tuple_pool.get(), &scanner_eos)); - - if (scanner_eos) { - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); - *eos = true; - return Status::OK(); - } - - int row_idx = row_batch->add_row(); - TupleRow* row = row_batch->get_row(row_idx); - row->set_tuple(_tuple_idx, _dest_tuple); - copy_one_row(); - - // Error logging: Flush error stream and add name of HBase table and current row key. - // check now - if (eval_conjuncts(&_conjunct_ctxs[0], _conjunct_ctxs.size(), row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - char* new_tuple = reinterpret_cast(_dest_tuple); - new_tuple += _dest_tuple_desc->byte_size(); - _dest_tuple = reinterpret_cast(new_tuple); - } - } - - return Status::OK(); -} - -Status SchemaScanNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - SCOPED_TIMER(_runtime_profile->total_time_counter()); - - _tuple_pool.reset(); - return ExecNode::close(state); -} - -void SchemaScanNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "SchemaScanNode(tupleid=" << _tuple_id << " table=" << _table_name; - *out << ")" << std::endl; - - for (int i = 0; i < _children.size(); ++i) { - _children[i]->debug_string(indentation_level + 1, out); - } -} - -Status SchemaScanNode::set_scan_ranges(const std::vector& scan_ranges) { - return Status::OK(); -} - -} // namespace doris - -/* vim: set ts=4 sw=4 sts=4 tw=100 : */ diff --git a/be/src/exec/schema_scan_node.h b/be/src/exec/schema_scan_node.h deleted file mode 100644 index 2b7b780ad9..0000000000 --- a/be/src/exec/schema_scan_node.h +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/scan_node.h" -#include "exec/schema_scanner.h" -#include "runtime/descriptors.h" - -namespace doris { - -class TextConverter; -class Tuple; -class TupleDescriptor; -class RuntimeState; -class MemPool; -class Status; - -class SchemaScanNode : public ScanNode { -public: - SchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~SchemaScanNode(); - - // Prepare conjuncts, create Schema columns to slots mapping - // initialize _schema_scanner - Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - - // Prepare conjuncts, create Schema columns to slots mapping - // initialize _schema_scanner - Status prepare(RuntimeState* state) override; - - // Start Schema scan using _schema_scanner. - Status open(RuntimeState* state) override; - - // Fill the next row batch by calling next() on the _schema_scanner, - Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - - // Close the _schema_scanner, and report errors. - Status close(RuntimeState* state) override; - - // this is no use in this class - Status set_scan_ranges(const std::vector& scan_ranges) override; - -private: - // Write debug string of this into out. - void debug_string(int indentation_level, std::stringstream* out) const override; - // Copy one row from schema table to input tuple - void copy_one_row(); - -protected: - bool _is_init; - const std::string _table_name; - SchemaScannerParam _scanner_param; - // Tuple id resolved in prepare() to set _tuple_desc; - TupleId _tuple_id; - - // Descriptor of tuples read from schema table. - const TupleDescriptor* _src_tuple_desc; - // Descriptor of dest tuples - const TupleDescriptor* _dest_tuple_desc; - // Tuple index in tuple row. - int _tuple_idx; - // slot num need to fill in and return - int _slot_num; - // Pool for allocating tuple data, including all varying-length slots. - std::unique_ptr _tuple_pool; - // Jni helper for scanning an schema table. - std::unique_ptr _schema_scanner; - // Current tuple. - Tuple* _src_tuple; - Tuple* _dest_tuple; - // Map from index in slots to column of schema table. - std::vector _index_map; -}; - -} // namespace doris diff --git a/be/src/exec/select_node.cpp b/be/src/exec/select_node.cpp deleted file mode 100644 index a343e86d38..0000000000 --- a/be/src/exec/select_node.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/select-node.cpp -// and modified by Doris - -#include "exec/select_node.h" - -#include "exprs/expr.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { - -SelectNode::SelectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _child_row_batch(nullptr), - _child_row_idx(0), - _child_eos(false) {} - -Status SelectNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size())); - return Status::OK(); -} - -Status SelectNode::open(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_ERROR(child(0)->open(state)); - return Status::OK(); -} - -Status SelectNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - RETURN_IF_CANCELLED(state); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - if (reached_limit() || (_child_row_idx == _child_row_batch->num_rows() && _child_eos)) { - // we're already done or we exhausted the last child batch and there won't be any - // new ones - _child_row_batch->transfer_resource_ownership(row_batch); - *eos = true; - return Status::OK(); - } - *eos = false; - - // start (or continue) consuming row batches from child - while (true) { - RETURN_IF_CANCELLED(state); - if (_child_row_idx == _child_row_batch->num_rows()) { - // fetch next batch - _child_row_idx = 0; - _child_row_batch->transfer_resource_ownership(row_batch); - _child_row_batch->reset(); - if (row_batch->at_capacity()) { - return Status::OK(); - } - RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos)); - } - - if (copy_rows(row_batch)) { - *eos = reached_limit() || - (_child_row_idx == _child_row_batch->num_rows() && _child_eos); - if (*eos) { - _child_row_batch->transfer_resource_ownership(row_batch); - } - return Status::OK(); - } - - if (_child_eos) { - // finished w/ last child row batch, and child eos is true - _child_row_batch->transfer_resource_ownership(row_batch); - *eos = true; - return Status::OK(); - } - } - - return Status::OK(); -} - -bool SelectNode::copy_rows(RowBatch* output_batch) { - ExprContext** ctxs = &_conjunct_ctxs[0]; - int num_ctxs = _conjunct_ctxs.size(); - - for (; _child_row_idx < _child_row_batch->num_rows(); ++_child_row_idx) { - // Add a new row to output_batch - int dst_row_idx = output_batch->add_row(); - - if (dst_row_idx == RowBatch::INVALID_ROW_INDEX) { - return true; - } - - TupleRow* dst_row = output_batch->get_row(dst_row_idx); - TupleRow* src_row = _child_row_batch->get_row(_child_row_idx); - - if (ExecNode::eval_conjuncts(ctxs, num_ctxs, src_row)) { - output_batch->copy_row(src_row, dst_row); - output_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - - if (reached_limit()) { - return true; - } - } - } - - if (VLOG_ROW_IS_ON) { - for (int i = 0; i < output_batch->num_rows(); ++i) { - TupleRow* row = output_batch->get_row(i); - VLOG_ROW << "SelectNode input row: " << row->to_string(row_desc()); - } - } - - return output_batch->is_full() || output_batch->at_resource_limit(); -} - -Status SelectNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - _child_row_batch.reset(); - return ExecNode::close(state); -} - -} // namespace doris diff --git a/be/src/exec/select_node.h b/be/src/exec/select_node.h deleted file mode 100644 index cf4a871fe6..0000000000 --- a/be/src/exec/select_node.h +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/select-node.h -// and modified by Doris - -#pragma once - -#include "exec/exec_node.h" - -namespace doris { - -class Tuple; -class TupleRow; - -// Node that evaluates conjuncts and enforces a limit but otherwise passes along -// the rows pulled from its child unchanged. -class SelectNode : public ExecNode { -public: - SelectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - -private: - // current row batch of child - std::unique_ptr _child_row_batch; - - // index of current row in _child_row_batch - int _child_row_idx; - - // true if last get_next() call on child signalled eos - bool _child_eos; - - // Copy rows from _child_row_batch for which _conjuncts evaluate to true to - // output_batch, up to _limit. - // Return true if limit was hit or output_batch should be returned, otherwise false. - bool copy_rows(RowBatch* output_batch); -}; - -} // namespace doris diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp deleted file mode 100644 index 4a75ec234d..0000000000 --- a/be/src/exec/set_operation_node.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/set_operation_node.h" - -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/descriptors.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { -SetOperationNode::SetOperationNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs, int tuple_id) - : ExecNode(pool, tnode, descs), - _tuple_id(tuple_id), - _tuple_desc(nullptr), - _valid_element_in_hash_tbl(0) {} - -Status SetOperationNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - DCHECK_EQ(_conjunct_ctxs.size(), 0); - DCHECK_GE(_children.size(), 2); - return Status::OK(); -} - -Status SetOperationNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - DCHECK(_tuple_desc != nullptr); - _build_pool.reset(new MemPool(mem_tracker_held())); - _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); - _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); - for (size_t i = 0; i < _child_expr_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc())); - DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); - } - _build_tuple_size = child(0)->row_desc().tuple_descriptors().size(); - _build_tuple_row_size = _build_tuple_size * sizeof(Tuple*); - _find_nulls = std::vector(); - for (auto ctx : _child_expr_lists[0]) { - _find_nulls.push_back(!ctx->root()->is_slotref() || ctx->is_nullable()); - } - return Status::OK(); -} - -Status SetOperationNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - for (auto& exprs : _child_expr_lists) { - Expr::close(exprs, state); - } - - // Must reset _probe_batch in close() to release resources - _probe_batch.reset(nullptr); - - if (_hash_tbl.get() != nullptr) { - _hash_tbl->close(); - } - if (_build_pool.get() != nullptr) { - _build_pool->free_all(); - } - - return ExecNode::close(state); -} - -std::string SetOperationNode::get_row_output_string(TupleRow* row, const RowDescriptor& row_desc) { - std::stringstream out; - out << "["; - for (int i = 0; i < row_desc.tuple_descriptors().size(); ++i) { - if (i != 0) { - out << " "; - } - out << Tuple::to_string(row->get_tuple(i), *row_desc.tuple_descriptors()[i]); - } - - out << "]"; - return out.str(); -} - -void SetOperationNode::create_output_row(TupleRow* input_row, RowBatch* row_batch, - uint8_t* tuple_buf) { - TupleRow* output_row = row_batch->get_row(row_batch->add_row()); - Tuple* dst_tuple = reinterpret_cast(tuple_buf); - const std::vector& exprs = _child_expr_lists[0]; - dst_tuple->materialize_exprs(input_row, *_tuple_desc, exprs, - row_batch->tuple_data_pool(), nullptr, nullptr); - output_row->set_tuple(0, dst_tuple); - row_batch->commit_last_row(); - VLOG_ROW << "commit row: " << get_row_output_string(output_row, row_desc()); -} - -bool SetOperationNode::equals(TupleRow* row, TupleRow* other) { - DCHECK(!(row == nullptr && other == nullptr)); - if (row == nullptr || other == nullptr) { - return false; - } - for (int i = 0; i < _child_expr_lists[0].size(); ++i) { - void* val_row = _child_expr_lists[0][i]->get_value(row); - void* val_other = _child_expr_lists[0][i]->get_value(other); - if (_find_nulls[i] && val_row == nullptr && val_other == nullptr) { - continue; - } else if (val_row == nullptr || val_other == nullptr) { - return false; - } else if (!RawValue::eq(val_row, val_other, _child_expr_lists[0][i]->root()->type())) { - return false; - } - } - return true; -} - -Status SetOperationNode::open(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { - RETURN_IF_ERROR(Expr::open(exprs, state)); - } - // initial build hash table used for remove duplicated - _hash_tbl.reset(new HashTable(_child_expr_lists[0], _child_expr_lists[1], _build_tuple_size, - true, _find_nulls, id(), - BitUtil::RoundUpToPowerOfTwo(state->batch_size()))); - RowBatch build_batch(child(0)->row_desc(), state->batch_size()); - RETURN_IF_ERROR(child(0)->open(state)); - - bool eos = false; - while (!eos) { - SCOPED_TIMER(_build_timer); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(child(0)->get_next(state, &build_batch, &eos)); - // take ownership of tuple data of build_batch - _build_pool->acquire_data(build_batch.tuple_data_pool(), false); - // build hash table and remove duplicate items - RETURN_IF_ERROR(_hash_tbl->resize_buckets_ahead(build_batch.num_rows())); - for (int i = 0; i < build_batch.num_rows(); ++i) { - _hash_tbl->insert_unique_without_check(build_batch.get_row(i)); - } - VLOG_ROW << "hash table content: " << _hash_tbl->debug_string(true, &child(0)->row_desc()); - build_batch.reset(); - } - - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/set_operation_node.h b/be/src/exec/set_operation_node.h deleted file mode 100644 index 4e2722b94d..0000000000 --- a/be/src/exec/set_operation_node.h +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/exec_node.h" -#include "exec/hash_table.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TupleRow; - -// Node that calculate the set operation results of its children by either materializing their -// evaluated expressions into row batches or passing through (forwarding) the -// batches if the input tuple layout is identical to the output tuple layout -// and expressions don't need to be evaluated. The children should be ordered -// such that all passthrough children come before the children that need -// materialization. The set operation node pulls from its children sequentially, i.e. -// it exhausts one child completely before moving on to the next one. -class SetOperationNode : public ExecNode { -public: - SetOperationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs, - int tuple_id); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status close(RuntimeState* state); - virtual Status open(RuntimeState* state); - -protected: - std::string get_row_output_string(TupleRow* row, const RowDescriptor& row_desc); - void create_output_row(TupleRow* input_row, RowBatch* row_batch, uint8_t* tuple_buf); - // Returns true if the values of row and other are equal - bool equals(TupleRow* row, TupleRow* other); - - template - // Refresh the hash table and probe expr, before we dispose data of next child - // TODO: Check whether the hash table should be shrink to reduce necessary refresh - // but may different child has different probe expr which may cause wrong result. - // so we need keep probe expr same in FE to optimize this issue. - Status refresh_hash_table(int child); - - /// Tuple id resolved in Prepare() to set tuple_desc_; - const int _tuple_id; - /// Descriptor for tuples this union node constructs. - const TupleDescriptor* _tuple_desc; - // Exprs materialized by this node. The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; - - std::unique_ptr _hash_tbl; - HashTable::Iterator _hash_tbl_iterator; - int64_t _valid_element_in_hash_tbl; - - std::unique_ptr _probe_batch; - // holds everything referenced in _hash_tbl - std::unique_ptr _build_pool; - - int _build_tuple_size; - int _build_tuple_row_size; - std::vector _find_nulls; - - RuntimeProfile::Counter* _build_timer; // time to build hash table - RuntimeProfile::Counter* _probe_timer; // time to probe -}; - -template -Status SetOperationNode::refresh_hash_table(int child_id) { - SCOPED_TIMER(_build_timer); - std::unique_ptr temp_tbl(new HashTable( - _child_expr_lists[0], _child_expr_lists[child_id], _build_tuple_size, true, _find_nulls, - id(), _valid_element_in_hash_tbl / HashTable::MAX_BUCKET_OCCUPANCY_FRACTION + 1)); - _hash_tbl_iterator = _hash_tbl->begin(); - while (_hash_tbl_iterator.has_next()) { - if constexpr (keep_matched) { - if (_hash_tbl_iterator.matched()) { - RETURN_IF_ERROR(temp_tbl->insert(_hash_tbl_iterator.get_row())); - } - } else { - if (!_hash_tbl_iterator.matched()) { - RETURN_IF_ERROR(temp_tbl->insert(_hash_tbl_iterator.get_row())); - } - } - _hash_tbl_iterator.next(); - } - _hash_tbl.swap(temp_tbl); - temp_tbl->close(); - return Status::OK(); -} - -}; // namespace doris diff --git a/be/src/exec/sort_exec_exprs.cpp b/be/src/exec/sort_exec_exprs.cpp deleted file mode 100644 index bede01e503..0000000000 --- a/be/src/exec/sort_exec_exprs.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/sort-exec-exprs.cc -// and modified by Doris - -#include "exec/sort_exec_exprs.h" - -namespace doris { - -Status SortExecExprs::init(const TSortInfo& sort_info, ObjectPool* pool) { - return init( - sort_info.ordering_exprs, - sort_info.__isset.sort_tuple_slot_exprs ? &sort_info.sort_tuple_slot_exprs : nullptr, - pool); -} - -Status SortExecExprs::init(const std::vector& ordering_exprs, - const std::vector* sort_tuple_slot_exprs, ObjectPool* pool) { - RETURN_IF_ERROR(Expr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs)); - if (sort_tuple_slot_exprs != nullptr) { - _materialize_tuple = true; - RETURN_IF_ERROR( - Expr::create_expr_trees(pool, *sort_tuple_slot_exprs, &_sort_tuple_slot_expr_ctxs)); - } else { - _materialize_tuple = false; - } - return Status::OK(); -} - -Status SortExecExprs::init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs) { - _lhs_ordering_expr_ctxs = lhs_ordering_expr_ctxs; - _rhs_ordering_expr_ctxs = rhs_ordering_expr_ctxs; - return Status::OK(); -} - -Status SortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc, - const RowDescriptor& output_row_desc) { - if (_materialize_tuple) { - RETURN_IF_ERROR(Expr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc)); - } - RETURN_IF_ERROR(Expr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc)); - return Status::OK(); -} - -Status SortExecExprs::open(RuntimeState* state) { - if (_materialize_tuple) { - RETURN_IF_ERROR(Expr::open(_sort_tuple_slot_expr_ctxs, state)); - } - RETURN_IF_ERROR(Expr::open(_lhs_ordering_expr_ctxs, state)); - RETURN_IF_ERROR( - Expr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs)); - return Status::OK(); -} - -void SortExecExprs::close(RuntimeState* state) { - if (_materialize_tuple) { - Expr::close(_sort_tuple_slot_expr_ctxs, state); - } - Expr::close(_lhs_ordering_expr_ctxs, state); - Expr::close(_rhs_ordering_expr_ctxs, state); -} - -} //namespace doris diff --git a/be/src/exec/sort_exec_exprs.h b/be/src/exec/sort_exec_exprs.h deleted file mode 100644 index 798cb5eebb..0000000000 --- a/be/src/exec/sort_exec_exprs.h +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/sort-exec-exprs.h -// and modified by Doris - -#pragma once - -#include "exprs/expr.h" -#include "runtime/runtime_state.h" - -namespace doris { - -class MemTracker; - -// Helper class to Prepare() , Open() and Close() the ordering expressions used to perform -// comparisons in a sort. Used by TopNNode, SortNode. When two -// rows are compared, the ordering expressions are evaluated once for each side. -// TopN and Sort materialize input rows into a single tuple before sorting. -// If _materialize_tuple is true, SortExecExprs also stores the slot expressions used to -// materialize the sort tuples. -class SortExecExprs { -public: - // Initialize the expressions from a TSortInfo using the specified pool. - Status init(const TSortInfo& sort_info, ObjectPool* pool); - - // Initialize the ordering and (optionally) materialization expressions from the thrift - // TExprs into the specified pool. sort_tuple_slot_exprs is nullptr if the tuple is not - // materialized. - Status init(const std::vector& ordering_exprs, - const std::vector* sort_tuple_slot_exprs, ObjectPool* pool); - - // prepare all expressions used for sorting and tuple materialization. - Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc, - const RowDescriptor& output_row_desc); - - // open all expressions used for sorting and tuple materialization. - Status open(RuntimeState* state); - - // close all expressions used for sorting and tuple materialization. - void close(RuntimeState* state); - - const std::vector& sort_tuple_slot_expr_ctxs() const { - return _sort_tuple_slot_expr_ctxs; - } - - // Can only be used after calling prepare() - const std::vector& lhs_ordering_expr_ctxs() const { - return _lhs_ordering_expr_ctxs; - } - // Can only be used after calling open() - const std::vector& rhs_ordering_expr_ctxs() const { - return _rhs_ordering_expr_ctxs; - } - -private: - // Create two ExprContexts for evaluating over the TupleRows. - std::vector _lhs_ordering_expr_ctxs; - std::vector _rhs_ordering_expr_ctxs; - - // If true, the tuples to be sorted are materialized by - // _sort_tuple_slot_exprs before the actual sort is performed. - bool _materialize_tuple; - - // Expressions used to materialize slots in the tuples to be sorted. - // One expr per slot in the materialized tuple. Valid only if - // _materialize_tuple is true. - std::vector _sort_tuple_slot_expr_ctxs; - - // Initialize directly from already-created ExprContexts. Callers should manually call - // Prepare(), Open(), and Close() on input ExprContexts (instead of calling the - // analogous functions in this class). Used for testing. - Status init(const std::vector& lhs_ordering_expr_ctxs, - const std::vector& rhs_ordering_expr_ctxs); -}; - -} // namespace doris diff --git a/be/src/exec/spill_sort_node.cc b/be/src/exec/spill_sort_node.cc deleted file mode 100644 index f85b6cf1a6..0000000000 --- a/be/src/exec/spill_sort_node.cc +++ /dev/null @@ -1,171 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/spill_sort_node.h" - -#include "exec/sort_exec_exprs.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { - -SpillSortNode::SpillSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), - _sorter(nullptr), - _num_rows_skipped(0) {} - -SpillSortNode::~SpillSortNode() {} - -Status SpillSortNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - RETURN_IF_ERROR(_sort_exec_exprs.init(tnode.sort_node.sort_info, _pool)); - _is_asc_order = tnode.sort_node.sort_info.is_asc_order; - _nulls_first = tnode.sort_node.sort_info.nulls_first; - return Status::OK(); -} - -Status SpillSortNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor)); - // AddExprCtxsToFree(_sort_exec_exprs); - return Status::OK(); -} - -Status SpillSortNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_ERROR(_sort_exec_exprs.open(state)); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Spill sort, while open.")); - RETURN_IF_ERROR(child(0)->open(state)); - - // These objects must be created after opening the _sort_exec_exprs. Avoid creating - // them after every reset()/open(). - if (_sorter.get() == nullptr) { - TupleRowComparator less_than(_sort_exec_exprs, _is_asc_order, _nulls_first); - // Create and initialize the external sort impl object - _sorter.reset(new SpillSorter(less_than, _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), - &_row_descriptor, runtime_profile(), state)); - RETURN_IF_ERROR(_sorter->init()); - } - - // The child has been opened and the sorter created. Sort the input. - // The final merge is done on-demand as rows are requested in get_next(). - RETURN_IF_ERROR(sort_input(state)); - - // Unless we are inside a subplan expecting to call open()/get_next() on the child - // again, the child can be closed at this point. - // if (!IsInSubplan()) { - child(0)->close(state); - // } - return Status::OK(); -} - -Status SpillSortNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Spill sort, while getting next.")); - - if (reached_limit()) { - *eos = true; - return Status::OK(); - } else { - *eos = false; - } - - DCHECK_EQ(row_batch->num_rows(), 0); - RETURN_IF_ERROR(_sorter->get_next(row_batch, eos)); - while ((_num_rows_skipped < _offset)) { - _num_rows_skipped += row_batch->num_rows(); - // Throw away rows in the output batch until the offset is skipped. - int rows_to_keep = _num_rows_skipped - _offset; - if (rows_to_keep > 0) { - row_batch->copy_rows(0, row_batch->num_rows() - rows_to_keep, rows_to_keep); - row_batch->set_num_rows(rows_to_keep); - } else { - row_batch->set_num_rows(0); - } - if (rows_to_keep > 0 || *eos) { - break; - } - RETURN_IF_ERROR(_sorter->get_next(row_batch, eos)); - } - - _num_rows_returned += row_batch->num_rows(); - if (reached_limit()) { - row_batch->set_num_rows(row_batch->num_rows() - (_num_rows_returned - _limit)); - *eos = true; - } - - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -Status SpillSortNode::reset(RuntimeState* state) { - _num_rows_skipped = 0; - if (_sorter.get() != nullptr) { - _sorter->reset(); - } - // return ExecNode::reset(state); - return Status::OK(); -} - -Status SpillSortNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - _sort_exec_exprs.close(state); - _sorter.reset(); - ExecNode::close(state); - return Status::OK(); -} - -void SpillSortNode::debug_string(int indentation_level, stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "SpillSortNode(" << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()); - for (int i = 0; i < _is_asc_order.size(); ++i) { - *out << (i > 0 ? " " : "") << (_is_asc_order[i] ? "asc" : "desc") << " nulls " - << (_nulls_first[i] ? "first" : "last"); - } - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -Status SpillSortNode::sort_input(RuntimeState* state) { - RowBatch batch(child(0)->row_desc(), state->batch_size()); - bool eos = false; - do { - batch.reset(); - RETURN_IF_ERROR(child(0)->get_next(state, &batch, &eos)); - RETURN_IF_ERROR(_sorter->add_batch(&batch)); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Spill sort, while sorting input.")); - } while (!eos); - - RETURN_IF_ERROR(_sorter->input_done()); - if (_sorter->is_spilled()) { - add_runtime_exec_option("Spilled"); - } - return Status::OK(); -} - -} // end namespace doris diff --git a/be/src/exec/spill_sort_node.h b/be/src/exec/spill_sort_node.h deleted file mode 100644 index 4633c4748a..0000000000 --- a/be/src/exec/spill_sort_node.h +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/exec_node.h" -#include "exec/sort_exec_exprs.h" -#include "runtime/spill_sorter.h" - -namespace doris { - -// Node that implements a full sort of its input with a fixed memory budget, spilling -// to disk if the input is larger than available memory. -// Uses SpillSorter and BufferedBlockMgr for the external sort implementation. -// Input rows to SpillSortNode are materialized by the SpillSorter into a single tuple -// using the expressions specified in _sort_exec_exprs. -// In get_next(), SpillSortNode passes in the output batch to the sorter instance created -// in open() to fill it with sorted rows. -// If a merge phase was performed in the sort, sorted rows are deep copied into -// the output batch. Otherwise, the sorter instance owns the sorted data. -class SpillSortNode : public ExecNode { -public: - SpillSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~SpillSortNode(); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status reset(RuntimeState* state); - virtual Status close(RuntimeState* state); - -protected: - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - // Fetch input rows and feed them to the sorter until the input is exhausted. - Status sort_input(RuntimeState* state); - - // Number of rows to skip. - int64_t _offset; - - // Expressions and parameters used for tuple materialization and tuple comparison. - SortExecExprs _sort_exec_exprs; - std::vector _is_asc_order; - std::vector _nulls_first; - - ///////////////////////////////////////// - // BEGIN: Members that must be reset() - - // Object used for external sorting. - std::unique_ptr _sorter; - - // Keeps track of the number of rows skipped for handling _offset. - int64_t _num_rows_skipped; - - // END: Members that must be reset() - ///////////////////////////////////////// -}; - -} // end namespace doris diff --git a/be/src/exec/table_function_node.cpp b/be/src/exec/table_function_node.cpp deleted file mode 100644 index 3c1a26cf74..0000000000 --- a/be/src/exec/table_function_node.cpp +++ /dev/null @@ -1,384 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/table_function_node.h" - -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/table_function/table_function_factory.h" -#include "runtime/descriptors.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "vec/exprs/vexpr.h" - -namespace doris { - -TableFunctionNode::TableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, - const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs) {} - -TableFunctionNode::~TableFunctionNode() = default; - -Status TableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - - for (const TExpr& texpr : tnode.table_function_node.fnCallExprList) { - ExprContext* ctx = nullptr; - RETURN_IF_ERROR(Expr::create_expr_tree(_pool, texpr, &ctx)); - _fn_ctxs.push_back(ctx); - - Expr* root = ctx->root(); - const std::string& tf_name = root->fn().name.function_name; - TableFunction* fn = nullptr; - RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, false, _pool, &fn)); - fn->set_expr_context(ctx); - _fns.push_back(fn); - } - _fn_num = _fns.size(); - _fn_values.resize(_fn_num); - _fn_value_lengths.resize(_fn_num); - - // Prepare output slot ids - RETURN_IF_ERROR(_prepare_output_slot_ids(tnode)); - return Status::OK(); -} - -Status TableFunctionNode::_prepare_output_slot_ids(const TPlanNode& tnode) { - // Prepare output slot ids - if (tnode.table_function_node.outputSlotIds.empty()) { - return Status::InternalError("Output slots of table function node is empty"); - } - SlotId max_id = -1; - for (auto slot_id : tnode.table_function_node.outputSlotIds) { - if (slot_id > max_id) { - max_id = slot_id; - } - } - _output_slot_ids = std::vector(max_id + 1, false); - for (auto slot_id : tnode.table_function_node.outputSlotIds) { - _output_slot_ids[slot_id] = true; - } - - return Status::OK(); -} - -bool TableFunctionNode::_is_inner_and_empty() { - for (int i = 0; i < _fn_num; i++) { - // if any table function is not outer and has empty result, go to next child row - if (!_fns[i]->is_outer() && _fns[i]->current_empty()) { - return true; - } - } - return false; -} - -Status TableFunctionNode::prepare(RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - _num_rows_filtered_counter = ADD_COUNTER(_runtime_profile, "RowsFiltered", TUnit::UNIT); - - RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor)); - for (auto fn : _fns) { - RETURN_IF_ERROR(fn->prepare()); - } - return Status::OK(); -} - -Status TableFunctionNode::alloc_resource(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_CANCELLED(state); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - ExecNode::alloc_resource(state); - - RETURN_IF_ERROR(Expr::open(_fn_ctxs, state)); - RETURN_IF_ERROR(vectorized::VExpr::open(_vfn_ctxs, state)); - - for (auto fn : _fns) { - RETURN_IF_ERROR(fn->open()); - } - - return Status::OK(); -} - -Status TableFunctionNode::_process_next_child_row() { - if (_cur_child_offset == _cur_child_batch->num_rows()) { - _cur_child_batch->reset(); - _child_batch_exhausted = true; - return Status::OK(); - } - _cur_child_tuple_row = _cur_child_batch->get_row(_cur_child_offset++); - for (TableFunction* fn : _fns) { - RETURN_IF_ERROR(fn->process(_cur_child_tuple_row)); - } - - _child_batch_exhausted = false; - return Status::OK(); -} - -// Returns the index of fn of the last eos counted from back to front -// eg: there are 3 functions in `_fns` -// eos: false, true, true -// return: 1 -// -// eos: false, false, true -// return: 2 -// -// eos: false, false, false -// return: -1 -// -// eos: true, true, true -// return: 0 -// -// return: -// 0: all fns are eos -// -1: all fns are not eos -// >0: some of fns are eos -int TableFunctionNode::_find_last_fn_eos_idx() { - for (int i = _fn_num - 1; i >= 0; --i) { - if (!_fns[i]->eos()) { - if (i == _fn_num - 1) { - return -1; - } else { - return i + 1; - } - } - } - // all eos - return 0; -} - -// Roll to reset the table function. -// Eg: -// There are 3 functions f1, f2 and f3 in `_fns`. -// If `last_eos_idx` is 1, which means f2 and f3 are eos. -// So we need to forward f1, and reset f2 and f3. -bool TableFunctionNode::_roll_table_functions(int last_eos_idx) { - bool fn_eos = false; - int i = last_eos_idx - 1; - for (; i >= 0; --i) { - _fns[i]->forward(&fn_eos); - if (!fn_eos) { - break; - } - } - if (i == -1) { - // after forward, all functions are eos. - // we should process next child row to get more table function results. - return false; - } - - for (int j = i + 1; j < _fn_num; ++j) { - _fns[j]->reset(); - } - - return true; -} - -// There are 2 while loops in this method. -// The outer loop is to get the next batch from child node. -// And the inner loop is to expand the row by table functions, and output row by row. -Status TableFunctionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - - const RowDescriptor& parent_rowdesc = row_batch->row_desc(); - const RowDescriptor& child_rowdesc = _children[0]->row_desc(); - if (_parent_tuple_desc_size == -1) { - _parent_tuple_desc_size = parent_rowdesc.tuple_descriptors().size(); - _child_tuple_desc_size = child_rowdesc.tuple_descriptors().size(); - for (int i = 0; i < _child_tuple_desc_size; ++i) { - _child_slot_sizes.push_back(child_rowdesc.tuple_descriptors()[i]->slots().size()); - } - } - - uint8_t* tuple_buffer = nullptr; - Tuple* tuple_ptr = nullptr; - Tuple* pre_tuple_ptr = nullptr; - - while (true) { - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("TableFunctionNode, while getting next batch.")); - - if (_cur_child_batch == nullptr) { - _cur_child_batch.reset(new RowBatch(child_rowdesc, state->batch_size())); - } - if (_child_batch_exhausted) { - if (_child_eos) { - // current child batch is exhausted, and no more batch from child node - break; - } - // current child batch is exhausted, get next batch from child - RETURN_IF_ERROR(_children[0]->get_next(state, _cur_child_batch.get(), &_child_eos)); - if (_cur_child_batch->num_rows() == 0) { - // no more batch from child node - break; - } - - _cur_child_offset = 0; - RETURN_IF_ERROR(_process_next_child_row()); - if (_child_batch_exhausted) { - continue; - } - } - - bool skip_child_row = false; - while (true) { - int idx = _find_last_fn_eos_idx(); - if (idx == 0 || skip_child_row) { - // all table functions' results are exhausted, process next child row - RETURN_IF_ERROR(_process_next_child_row()); - if (_child_batch_exhausted) { - break; - } - } else if (idx < _fn_num && idx != -1) { - // some of table functions' results are exhausted - if (!_roll_table_functions(idx)) { - // continue to process next child row - continue; - } - } - - // if any table function is not outer and has empty result, go to next child row - if (skip_child_row = _is_inner_and_empty(); skip_child_row) { - continue; - } - - // get slots from every table function - // Notice that _fn_values[i] may be null if the table function has empty result set. - for (int i = 0; i < _fn_num; i++) { - RETURN_IF_ERROR(_fns[i]->get_value(&_fn_values[i])); - } - - // allocate memory for row batch for the first time - if (tuple_buffer == nullptr) { - int64_t tuple_buffer_size; - RETURN_IF_ERROR(row_batch->resize_and_allocate_tuple_buffer( - state, &tuple_buffer_size, &tuple_buffer)); - tuple_ptr = reinterpret_cast(tuple_buffer); - } - - pre_tuple_ptr = tuple_ptr; - // The tuples order in parent row batch should be - // child1, child2, tf1, tf2, ... - TupleRow* parent_tuple_row = row_batch->get_row(row_batch->add_row()); - // 1. copy child tuples - int tuple_idx = 0; - for (int i = 0; i < _child_tuple_desc_size; tuple_idx++, i++) { - TupleDescriptor* child_tuple_desc = child_rowdesc.tuple_descriptors()[tuple_idx]; - TupleDescriptor* parent_tuple_desc = parent_rowdesc.tuple_descriptors()[tuple_idx]; - - auto tuple_idx = child_rowdesc.get_tuple_idx(child_tuple_desc->id()); - RETURN_IF_INVALID_TUPLE_IDX(child_tuple_desc->id(), tuple_idx); - Tuple* child_tuple = _cur_child_tuple_row->get_tuple(tuple_idx); - - // The child tuple is nullptr, only when the child tuple is from outer join. so we directly set - // parent_tuple have same tuple_idx nullptr to mock the behavior - if (child_tuple != nullptr) { - // copy the child tuple to parent_tuple - memcpy(tuple_ptr, child_tuple, parent_tuple_desc->byte_size()); - // only deep copy the child slot if it is selected and is var len (Eg: string, bitmap, hll) - for (int j = 0; j < _child_slot_sizes[i]; ++j) { - SlotDescriptor* child_slot_desc = child_tuple_desc->slots()[j]; - SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[j]; - - if (child_tuple->is_null(child_slot_desc->null_indicator_offset())) { - continue; - } - if (child_slot_desc->type().is_string_type()) { - void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); - if (_output_slot_ids[parent_slot_desc->id()]) { - // deep coopy - RawValue::write( - child_tuple->get_slot(child_slot_desc->tuple_offset()), - dest_slot, parent_slot_desc->type(), - row_batch->tuple_data_pool()); - } else { - // clear for unused slot - StringValue* dest = reinterpret_cast(dest_slot); - dest->replace(nullptr, 0); - } - } - } - parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); - } else { - parent_tuple_row->set_tuple(tuple_idx, nullptr); - } - tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + - parent_tuple_desc->byte_size()); - } - - // 2. copy function result - for (int i = 0; tuple_idx < _parent_tuple_desc_size; tuple_idx++, i++) { - TupleDescriptor* parent_tuple_desc = parent_rowdesc.tuple_descriptors()[tuple_idx]; - SlotDescriptor* parent_slot_desc = parent_tuple_desc->slots()[0]; - void* dest_slot = tuple_ptr->get_slot(parent_slot_desc->tuple_offset()); - if (_fn_values[i] != nullptr) { - RawValue::write(_fn_values[i], dest_slot, parent_slot_desc->type(), - row_batch->tuple_data_pool()); - tuple_ptr->set_not_null(parent_slot_desc->null_indicator_offset()); - } else { - tuple_ptr->set_null(parent_slot_desc->null_indicator_offset()); - } - parent_tuple_row->set_tuple(tuple_idx, tuple_ptr); - tuple_ptr = reinterpret_cast(reinterpret_cast(tuple_ptr) + - parent_tuple_desc->byte_size()); - } - - // 3. eval conjuncts - if (eval_conjuncts(&_conjunct_ctxs[0], _conjunct_ctxs.size(), parent_tuple_row)) { - row_batch->commit_last_row(); - ++_num_rows_returned; - } else { - tuple_ptr = pre_tuple_ptr; - ++_num_rows_filtered; - } - - // Forward after write success. - // Because data in `_fn_values` points to the data saved in functions. - // And `forward` will change the data in functions. - bool tmp = false; - _fns[_fn_num - 1]->forward(&tmp); - - if (row_batch->at_capacity()) { - break; - } - } // end while true - - if (row_batch->at_capacity()) { - break; - } - } // end while cur_eos - - if (reached_limit()) { - int num_rows_over = _num_rows_returned - _limit; - row_batch->set_num_rows(row_batch->num_rows() - num_rows_over); - _num_rows_returned -= num_rows_over; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - *eos = true; - } else { - *eos = row_batch->num_rows() == 0; - } - - return Status::OK(); -} - -}; // namespace doris diff --git a/be/src/exec/table_function_node.h b/be/src/exec/table_function_node.h deleted file mode 100644 index ece29bd50e..0000000000 --- a/be/src/exec/table_function_node.h +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "vec/exprs/vexpr.h" - -namespace doris { - -class MemPool; -class RowBatch; -class TableFunction; -class TupleRow; - -// TableFunctionNode -class TableFunctionNode : public ExecNode { -public: - TableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - ~TableFunctionNode() override; - - Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; - Status prepare(RuntimeState* state) override; - Status open(RuntimeState* state) override { - START_AND_SCOPE_SPAN(state->get_tracer(), span, "TableFunctionNode::open"); - RETURN_IF_ERROR(alloc_resource(state)); - return _children[0]->open(state); - } - Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; - Status alloc_resource(RuntimeState* state) override; - void release_resource(doris::RuntimeState* state) override { - Expr::close(_fn_ctxs, state); - vectorized::VExpr::close(_vfn_ctxs, state); - - if (_num_rows_filtered_counter != nullptr) { - COUNTER_SET(_num_rows_filtered_counter, static_cast(_num_rows_filtered)); - } - ExecNode::release_resource(state); - } - -protected: - Status _prepare_output_slot_ids(const TPlanNode& tnode); - bool _is_inner_and_empty(); - - // return: - // 0: all fns are eos - // -1: all fns are not eos - // >0: some of fns are eos - int _find_last_fn_eos_idx(); - - virtual Status _process_next_child_row(); - - bool _roll_table_functions(int last_eos_idx); - - int64_t _cur_child_offset = 0; - TupleRow* _cur_child_tuple_row = nullptr; - std::shared_ptr _cur_child_batch; - // true means current child batch is completely consumed. - // we should get next batch from child node. - bool _child_batch_exhausted = true; - - std::vector _fn_ctxs; - std::vector _vfn_ctxs; - - std::vector _fns; - std::vector _fn_values; - std::vector _fn_value_lengths; - int _fn_num = 0; - - // std::unordered_set _output_slot_ids; - std::vector _output_slot_ids; - - int _parent_tuple_desc_size = -1; - int _child_tuple_desc_size = -1; - std::vector _child_slot_sizes; - // indicate if child node reach the end - bool _child_eos = false; - - RuntimeProfile::Counter* _num_rows_filtered_counter = nullptr; - uint64_t _num_rows_filtered = 0; -}; - -}; // namespace doris diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp deleted file mode 100644 index dda95e1c6c..0000000000 --- a/be/src/exec/topn_node.cpp +++ /dev/null @@ -1,245 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/topn-node.cc -// and modified by Doris - -#include "exec/topn_node.h" - -#include - -#include "exprs/expr.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" -#include "util/tuple_row_compare.h" - -namespace doris { - -TopNNode::TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0), - _materialized_tuple_desc(nullptr), - _tuple_row_less_than(nullptr), - _tuple_pool(nullptr), - _num_rows_skipped(0), - _priority_queue(nullptr) {} - -TopNNode::~TopNNode() {} - -Status TopNNode::init(const TPlanNode& tnode, RuntimeState* state) { - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - RETURN_IF_ERROR(_sort_exec_exprs.init(tnode.sort_node.sort_info, _pool)); - _is_asc_order = tnode.sort_node.sort_info.is_asc_order; - _nulls_first = tnode.sort_node.sort_info.nulls_first; - - DCHECK_EQ(_conjuncts.size(), 0) << "TopNNode should never have predicates to evaluate."; - _abort_on_default_limit_exceeded = tnode.sort_node.is_default_limit; - return Status::OK(); -} - -Status TopNNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _tuple_pool.reset(new MemPool(mem_tracker_held())); - RETURN_IF_ERROR(_sort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor)); - // AddExprCtxsToFree(_sort_exec_exprs); - - _tuple_row_less_than.reset( - new TupleRowComparator(_sort_exec_exprs, _is_asc_order, _nulls_first)); - - _abort_on_default_limit_exceeded = - _abort_on_default_limit_exceeded && state->abort_on_default_limit_exceeded(); - _materialized_tuple_desc = _row_descriptor.tuple_descriptors()[0]; - return Status::OK(); -} - -Status TopNNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Top n, before open.")); - RETURN_IF_ERROR(_sort_exec_exprs.open(state)); - - // Avoid creating them after every Reset()/Open(). - // TODO: For some reason initializing _priority_queue in Prepare() causes a 30% perf - // regression. Why?? - if (_priority_queue == nullptr) { - _priority_queue.reset(new SortingHeap, TupleRowComparator>( - *_tuple_row_less_than)); - } - - // Allocate memory for a temporary tuple. - _tmp_tuple = - reinterpret_cast(_tuple_pool->allocate(_materialized_tuple_desc->byte_size())); - RETURN_IF_ERROR(child(0)->open(state)); - - // Limit of 0, no need to fetch anything from children. - if (_limit != 0) { - RowBatch batch(child(0)->row_desc(), state->batch_size()); - bool eos = false; - - do { - batch.reset(); - RETURN_IF_ERROR(child(0)->get_next(state, &batch, &eos)); - - if (_abort_on_default_limit_exceeded && child(0)->rows_returned() > _limit) { - return Status::InternalError("DEFAULT_ORDER_BY_LIMIT has been exceeded."); - } - - for (int i = 0; i < batch.num_rows(); ++i) { - insert_tuple_row(batch.get_row(i)); - } - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Top n, while getting next from child 0.")); - } while (!eos); - } - - DCHECK_LE(_priority_queue->size(), _offset + _limit); - prepare_for_output(); - - // Unless we are inside a subplan expecting to call open()/get_next() on the child - // again, the child can be closed at this point. - // if (!is_in_subplan()) { - child(0)->close(state); - // } - return Status::OK(); -} - -Status TopNNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - RETURN_IF_ERROR(state->check_query_state("Top n, before moving result to row_batch.")); - - while (!row_batch->at_capacity() && (_get_next_iter != _sorted_top_n.end())) { - if (_num_rows_skipped < _offset) { - ++_get_next_iter; - _num_rows_skipped++; - continue; - } - - int row_idx = row_batch->add_row(); - TupleRow* dst_row = row_batch->get_row(row_idx); - Tuple* src_tuple = *_get_next_iter; - TupleRow* src_row = reinterpret_cast(&src_tuple); - row_batch->copy_row(src_row, dst_row); - ++_get_next_iter; - row_batch->commit_last_row(); - ++_num_rows_returned; - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - } - if (VLOG_ROW_IS_ON) { - VLOG_ROW << "TOPN-node output row: " << row_batch->to_string(); - } - - *eos = _get_next_iter == _sorted_top_n.end(); - // Transfer ownership of tuple data to output batch. - // TODO: To improve performance for small inputs when this node is run multiple times - // inside a subplan, we might choose to only selectively transfer, e.g., when the - // block(s) in the pool are all full or when the pool has reached a certain size. - if (*eos) { - row_batch->tuple_data_pool()->acquire_data(_tuple_pool.get(), false); - } - return Status::OK(); -} - -Status TopNNode::close(RuntimeState* state) { - if (is_closed()) { - return Status::OK(); - } - if (_tuple_pool.get() != nullptr) { - _tuple_pool->free_all(); - } - _sort_exec_exprs.close(state); - - return ExecNode::close(state); -} - -// Insert if either not at the limit or it's a new TopN tuple_row -void TopNNode::insert_tuple_row(TupleRow* input_row) { - if (_priority_queue->size() < _offset + _limit) { - auto insert_tuple = reinterpret_cast( - _tuple_pool->allocate(_materialized_tuple_desc->byte_size())); - insert_tuple->materialize_exprs(input_row, *_materialized_tuple_desc, - _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), - _tuple_pool.get(), nullptr, nullptr); - _priority_queue->push(insert_tuple); - } else { - DCHECK(!_priority_queue->empty()); - Tuple* top_tuple = _priority_queue->top(); - _tmp_tuple->materialize_exprs(input_row, *_materialized_tuple_desc, - _sort_exec_exprs.sort_tuple_slot_expr_ctxs(), nullptr, - nullptr, nullptr); - - if ((*_tuple_row_less_than)(_tmp_tuple, top_tuple)) { - // TODO: DeepCopy will allocate new buffers for the string data. This needs - // to be fixed to use a freelist - _tmp_tuple->deep_copy(top_tuple, *_materialized_tuple_desc, _tuple_pool.get()); - auto insert_tuple = top_tuple; - _priority_queue->replace_top(insert_tuple); - } - } -} - -// Reverse the order of the tuples in the priority queue -void TopNNode::prepare_for_output() { - _sorted_top_n = _priority_queue->sorted_seq(); - - _get_next_iter = _sorted_top_n.begin(); -} - -void TopNNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << std::string(indentation_level * 2, ' '); - *out << "TopNNode(" - // << " ordering_exprs=" << Expr::debug_string(_lhs_ordering_expr_ctxs) - << Expr::debug_string(_sort_exec_exprs.lhs_ordering_expr_ctxs()) << " sort_order=["; - - for (int i = 0; i < _is_asc_order.size(); ++i) { - *out << (i > 0 ? " " : "") << (_is_asc_order[i] ? "asc" : "desc") << " nulls " - << (_nulls_first[i] ? "first" : "last"); - } - - *out << "]"; - ExecNode::debug_string(indentation_level, out); - *out << ")"; -} - -void TopNNode::push_down_predicate(RuntimeState* state, std::list* expr_ctxs) { - std::list::iterator iter = expr_ctxs->begin(); - while (iter != expr_ctxs->end()) { - if ((*iter)->root()->is_bound(&_tuple_ids)) { - // LOG(INFO) << "push down success expr is " << (*iter)->debug_string(); - // (*iter)->get_child(0)->prepare(state, row_desc()); - (*iter)->prepare(state, row_desc()); - (*iter)->open(state); - _conjunct_ctxs.push_back(*iter); - iter = expr_ctxs->erase(iter); - } else { - ++iter; - } - } -} - -} // namespace doris diff --git a/be/src/exec/topn_node.h b/be/src/exec/topn_node.h deleted file mode 100644 index 5a7903af37..0000000000 --- a/be/src/exec/topn_node.h +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/topn-node.h -// and modified by Doris - -#pragma once - -#include - -#include "exec/exec_node.h" -#include "runtime/descriptors.h" -#include "util/sort_heap.h" -#include "util/tuple_row_compare.h" - -namespace doris { - -class MemPool; -class RuntimeState; -class Tuple; - -// Node for in-memory TopN (ORDER BY ... LIMIT) -// This handles the case where the result fits in memory. This node will do a deep -// copy of the tuples that are necessary for the output. -// This is implemented by storing rows in a priority queue. -class TopNNode : public ExecNode { -public: - TopNNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - virtual ~TopNNode(); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - virtual Status close(RuntimeState* state); - virtual void push_down_predicate(RuntimeState* state, std::list* expr_ctxs); - -protected: - virtual void debug_string(int indentation_level, std::stringstream* out) const; - -private: - friend class TupleLessThan; - - // Inserts a tuple row into the priority queue if it's in the TopN. Creates a deep - // copy of tuple_row, which it stores in _tuple_pool. - void insert_tuple_row(TupleRow* tuple_row); - - // Flatten and reverse the priority queue. - void prepare_for_output(); - - // number rows to skipped - int64_t _offset; - - // _sort_exec_exprs contains the ordering expressions used for tuple comparison and - // the materialization exprs for the output tuple. - SortExecExprs _sort_exec_exprs; - std::vector _is_asc_order; - std::vector _nulls_first; - - // Cached descriptor for the materialized tuple. Assigned in Prepare(). - TupleDescriptor* _materialized_tuple_desc; - - // Comparator for _priority_queue. - std::unique_ptr _tuple_row_less_than; - - // After computing the TopN in the priority_queue, pop them and put them in this vector - std::vector _sorted_top_n; - - // Tuple allocated once from _tuple_pool and reused in InsertTupleRow to - // materialize input tuples if necessary. After materialization, _tmp_tuple may be - // copied into the tuple pool and inserted into the priority queue. - Tuple* _tmp_tuple; - - // Stores everything referenced in _priority_queue - std::unique_ptr _tuple_pool; - - // Iterator over elements in _sorted_top_n. - std::vector::iterator _get_next_iter; - // std::vector::iterator _get_next_iter; - - // True if the _limit comes from DEFAULT_ORDER_BY_LIMIT and the query option - // ABORT_ON_DEFAULT_LIMIT_EXCEEDED is set. - bool _abort_on_default_limit_exceeded; - - ///////////////////////////////////////// - // BEGIN: Members that must be Reset() - - // Number of rows skipped. Used for adhering to _offset. - int64_t _num_rows_skipped; - - // The priority queue will never have more elements in it than the LIMIT. - std::unique_ptr, TupleRowComparator>> _priority_queue; - - // END: Members that must be Reset() - ///////////////////////////////////////// -}; - -}; // namespace doris diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp deleted file mode 100644 index fdf0718750..0000000000 --- a/be/src/exec/union_node.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/union-node.cc -// and modified by Doris - -#include "exec/union_node.h" - -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "util/runtime_profile.h" - -namespace doris { - -UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs), - _tuple_id(tnode.union_node.tuple_id), - _tuple_desc(nullptr), - _first_materialized_child_idx(tnode.union_node.first_materialized_child_idx), - _child_idx(0), - _child_batch(nullptr), - _child_row_idx(0), - _child_eos(false), - _const_expr_list_idx(0), - _to_close_child_idx(-1) {} - -Status UnionNode::init(const TPlanNode& tnode, RuntimeState* state) { - // TODO(zc): - // RETURN_IF_ERROR(ExecNode::init(tnode, state)); - RETURN_IF_ERROR(ExecNode::init(tnode, state)); - DCHECK(tnode.__isset.union_node); - DCHECK_EQ(_conjunct_ctxs.size(), 0); - // Create const_expr_ctx_lists_ from thrift exprs. - auto& const_texpr_lists = tnode.union_node.const_expr_lists; - for (auto& texprs : const_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); - _const_expr_lists.push_back(ctxs); - } - // Create result_expr_ctx_lists_ from thrift exprs. - auto& result_texpr_lists = tnode.union_node.result_expr_lists; - for (auto& texprs : result_texpr_lists) { - std::vector ctxs; - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, texprs, &ctxs)); - _child_expr_lists.push_back(ctxs); - } - return Status::OK(); -} - -Status UnionNode::prepare(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::prepare(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); - DCHECK(_tuple_desc != nullptr); - _materialize_exprs_evaluate_timer = - ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer"); - _codegend_union_materialize_batch_fns.resize(_child_expr_lists.size()); - // Prepare const expr lists. - for (const std::vector& exprs : _const_expr_lists) { - RETURN_IF_ERROR(Expr::prepare(exprs, state, row_desc())); - // TODO(zc) - // AddExprCtxsToFree(exprs); - DCHECK_EQ(exprs.size(), _tuple_desc->slots().size()); - } - - // Prepare result expr lists. - for (int i = 0; i < _child_expr_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc())); - // TODO(zc) - // AddExprCtxsToFree(_child_expr_lists[i]); - DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); - } - return Status::OK(); -} - -Status UnionNode::open(RuntimeState* state) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(ExecNode::open(state)); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - // open const expr lists. - for (const std::vector& exprs : _const_expr_lists) { - RETURN_IF_ERROR(Expr::open(exprs, state)); - } - // open result expr lists. - for (const std::vector& exprs : _child_expr_lists) { - RETURN_IF_ERROR(Expr::open(exprs, state)); - } - - // Ensures that rows are available for clients to fetch after this open() has - // succeeded. - if (!_children.empty()) RETURN_IF_ERROR(child(_child_idx)->open(state)); - - return Status::OK(); -} - -Status UnionNode::get_next_pass_through(RuntimeState* state, RowBatch* row_batch) { - DCHECK(!reached_limit()); - DCHECK(!is_in_subplan()); - DCHECK_LT(_child_idx, _children.size()); - DCHECK(is_child_passthrough(_child_idx)); - // TODO(zc) - // DCHECK(child(_child_idx)->row_desc().LayoutEquals(row_batch->row_desc())); - if (_child_eos) { - RETURN_IF_ERROR(child(_child_idx)->open(state)); - _child_eos = false; - } - DCHECK_EQ(row_batch->num_rows(), 0); - RETURN_IF_ERROR(child(_child_idx)->get_next(state, row_batch, &_child_eos)); - if (_child_eos) { - // Even though the child is at eos, it's not OK to close() it here. Once we close - // the child, the row batches that it produced are invalid. Marking the batch as - // needing a deep copy let's us safely close the child in the next get_next() call. - // TODO: Remove this as part of IMPALA-4179. - row_batch->mark_needs_deep_copy(); - _to_close_child_idx = _child_idx; - ++_child_idx; - } - return Status::OK(); -} - -Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch) { - // Fetch from children, evaluate corresponding exprs and materialize. - DCHECK(!reached_limit()); - DCHECK_LT(_child_idx, _children.size()); - int64_t tuple_buf_size; - uint8_t* tuple_buf; - RETURN_IF_ERROR( - row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); - memset(tuple_buf, 0, tuple_buf_size); - - while (has_more_materialized() && !row_batch->at_capacity()) { - // The loop runs until we are either done iterating over the children that require - // materialization, or the row batch is at capacity. - DCHECK(!is_child_passthrough(_child_idx)); - // Child row batch was either never set or we're moving on to a different child. - if (_child_batch.get() == nullptr) { - DCHECK_LT(_child_idx, _children.size()); - _child_batch.reset(new RowBatch(child(_child_idx)->row_desc(), state->batch_size())); - _child_row_idx = 0; - // open the current child unless it's the first child, which was already opened in - // UnionNode::open(). - if (_child_eos) { - RETURN_IF_ERROR(child(_child_idx)->open(state)); - _child_eos = false; - } - // The first batch from each child is always fetched here. - RETURN_IF_ERROR(child(_child_idx)->get_next(state, _child_batch.get(), &_child_eos)); - } - - while (!row_batch->at_capacity()) { - DCHECK(_child_batch.get() != nullptr); - DCHECK_LE(_child_row_idx, _child_batch->num_rows()); - if (_child_row_idx == _child_batch->num_rows()) { - // Move on to the next child if it is at eos. - if (_child_eos) break; - // Fetch more rows from the child. - _child_batch->reset(); - _child_row_idx = 0; - // All batches except the first batch from each child are fetched here. - RETURN_IF_ERROR( - child(_child_idx)->get_next(state, _child_batch.get(), &_child_eos)); - // If we fetched an empty batch, go back to the beginning of this while loop, and - // try again. - if (_child_batch->num_rows() == 0) continue; - } - DCHECK_EQ(_codegend_union_materialize_batch_fns.size(), _children.size()); - if (_codegend_union_materialize_batch_fns[_child_idx] == nullptr) { - SCOPED_TIMER(_materialize_exprs_evaluate_timer); - materialize_batch(row_batch, &tuple_buf); - } else { - _codegend_union_materialize_batch_fns[_child_idx](this, row_batch, &tuple_buf); - } - } - // It shouldn't be the case that we reached the limit because we shouldn't have - // incremented '_num_rows_returned' yet. - DCHECK(!reached_limit()); - - if (_child_eos && _child_row_idx == _child_batch->num_rows()) { - // Unless we are inside a subplan expecting to call open()/get_next() on the child - // again, the child can be closed at this point. - _child_batch.reset(); - if (!is_in_subplan()) child(_child_idx)->close(state); - ++_child_idx; - } else { - // If we haven't finished consuming rows from the current child, we must have ended - // up here because the row batch is at capacity. - DCHECK(row_batch->at_capacity()); - } - } - - DCHECK_LE(_child_idx, _children.size()); - return Status::OK(); -} - -Status UnionNode::get_next_const(RuntimeState* state, RowBatch* row_batch) { - DCHECK_EQ(state->per_fragment_instance_idx(), 0); - DCHECK_LT(_const_expr_list_idx, _const_expr_lists.size()); - // Create new tuple buffer for row_batch. - int64_t tuple_buf_size; - uint8_t* tuple_buf; - RETURN_IF_ERROR( - row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buf_size, &tuple_buf)); - memset(tuple_buf, 0, tuple_buf_size); - - while (_const_expr_list_idx < _const_expr_lists.size() && !row_batch->at_capacity()) { - materialize_exprs(_const_expr_lists[_const_expr_list_idx], nullptr, tuple_buf, row_batch); - RETURN_IF_ERROR(get_error_msg(_const_expr_lists[_const_expr_list_idx])); - tuple_buf += _tuple_desc->byte_size(); - ++_const_expr_list_idx; - } - - return Status::OK(); -} - -Status UnionNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) { - SCOPED_TIMER(_runtime_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); - RETURN_IF_CANCELLED(state); - - if (_to_close_child_idx != -1) { - // The previous child needs to be closed if passthrough was enabled for it. In the non - // passthrough case, the child was already closed in the previous call to get_next(). - DCHECK(is_child_passthrough(_to_close_child_idx)); - DCHECK(!is_in_subplan()); - child(_to_close_child_idx)->close(state); - _to_close_child_idx = -1; - } - - // Save the number of rows in case get_next() is called with a non-empty batch, which can - // happen in a subplan. - int num_rows_before = row_batch->num_rows(); - - if (has_more_passthrough()) { - RETURN_IF_ERROR(get_next_pass_through(state, row_batch)); - } else if (has_more_materialized()) { - RETURN_IF_ERROR(get_next_materialized(state, row_batch)); - } else if (has_more_const(state)) { - RETURN_IF_ERROR(get_next_const(state, row_batch)); - } - - int num_rows_added = row_batch->num_rows() - num_rows_before; - DCHECK_GE(num_rows_added, 0); - if (_limit != -1 && _num_rows_returned + num_rows_added > _limit) { - // Truncate the row batch if we went over the limit. - num_rows_added = _limit - _num_rows_returned; - row_batch->set_num_rows(num_rows_before + num_rows_added); - DCHECK_GE(num_rows_added, 0); - } - _num_rows_returned += num_rows_added; - - *eos = reached_limit() || - (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state)); - - COUNTER_SET(_rows_returned_counter, _num_rows_returned); - return Status::OK(); -} - -Status UnionNode::close(RuntimeState* state) { - if (is_closed()) return Status::OK(); - _child_batch.reset(); - for (auto& exprs : _const_expr_lists) { - Expr::close(exprs, state); - } - for (auto& exprs : _child_expr_lists) { - Expr::close(exprs, state); - } - return ExecNode::close(state); -} - -void UnionNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << string(indentation_level * 2, ' '); - *out << "_union(_first_materialized_child_idx=" << _first_materialized_child_idx - << " _row_descriptor=[" << row_desc().debug_string() << "] " - << " _child_expr_lists=["; - for (int i = 0; i < _child_expr_lists.size(); ++i) { - *out << Expr::debug_string(_child_expr_lists[i]) << ", "; - } - *out << "] \n"; - ExecNode::debug_string(indentation_level, out); - *out << ")" << std::endl; -} - -void UnionNode::materialize_exprs(const std::vector& exprs, TupleRow* row, - uint8_t* tuple_buf, RowBatch* dst_batch) { - DCHECK(!dst_batch->at_capacity()); - Tuple* dst_tuple = reinterpret_cast(tuple_buf); - TupleRow* dst_row = dst_batch->get_row(dst_batch->add_row()); - // dst_tuple->materialize_exprs(row, *_tuple_desc, exprs, - dst_tuple->materialize_exprs(row, *_tuple_desc, exprs, dst_batch->tuple_data_pool(), - nullptr, nullptr); - dst_row->set_tuple(0, dst_tuple); - dst_batch->commit_last_row(); -} - -void UnionNode::materialize_batch(RowBatch* dst_batch, uint8_t** tuple_buf) { - // Take all references to member variables out of the loop to reduce the number of - // loads and stores. - RowBatch* child_batch = _child_batch.get(); - int tuple_byte_size = _tuple_desc->byte_size(); - uint8_t* cur_tuple = *tuple_buf; - const std::vector& child_exprs = _child_expr_lists[_child_idx]; - - int num_rows_to_process = std::min(child_batch->num_rows() - _child_row_idx, - dst_batch->capacity() - dst_batch->num_rows()); - FOREACH_ROW_LIMIT(child_batch, _child_row_idx, num_rows_to_process, batch_iter) { - TupleRow* child_row = batch_iter.get(); - materialize_exprs(child_exprs, child_row, cur_tuple, dst_batch); - cur_tuple += tuple_byte_size; - } - - _child_row_idx += num_rows_to_process; - *tuple_buf = cur_tuple; -} - -Status UnionNode::get_error_msg(const std::vector& exprs) { - for (auto expr_ctx : exprs) { - std::string expr_error = expr_ctx->get_error_msg(); - if (!expr_error.empty()) { - return Status::RuntimeError(expr_error); - } - } - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exec/union_node.h b/be/src/exec/union_node.h deleted file mode 100644 index c93c2eb09f..0000000000 --- a/be/src/exec/union_node.h +++ /dev/null @@ -1,159 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/union-node.h -// and modified by Doris - -#pragma once - -#include "exec/exec_node.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" - -namespace doris { - -class DescriptorTbl; -class ExprContext; -class Tuple; -class TupleRow; -class TPlanNode; - -/// Node that merges the results of its children by either materializing their -/// evaluated expressions into row batches or passing through (forwarding) the -/// batches if the input tuple layout is identical to the output tuple layout -/// and expressions don't need to be evaluated. The children should be ordered -/// such that all passthrough children come before the children that need -/// materialization. The union node pulls from its children sequentially, i.e. -/// it exhausts one child completely before moving on to the next one. -class UnionNode : public ExecNode { -public: - UnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); - - virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr); - virtual Status prepare(RuntimeState* state); - virtual Status open(RuntimeState* state); - virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); - // virtual Status reset(RuntimeState* state); - virtual Status close(RuntimeState* state); - -protected: - void debug_string(int indentation_level, std::stringstream* out) const; - -private: - /// Tuple id resolved in Prepare() to set tuple_desc_; - const int _tuple_id; - - /// Descriptor for tuples this union node constructs. - const TupleDescriptor* _tuple_desc; - - /// Index of the first non-passthrough child; i.e. a child that needs materialization. - /// 0 when all children are materialized, '_children.size()' when no children are - /// materialized. - const int _first_materialized_child_idx; - - /// Const exprs materialized by this node. These exprs don't refer to any children. - /// Only materialized by the first fragment instance to avoid duplication. - std::vector> _const_expr_lists; - - /// Exprs materialized by this node. The i-th result expr list refers to the i-th child. - std::vector> _child_expr_lists; - - ///////////////////////////////////////// - /// BEGIN: Members that must be Reset() - - /// Index of current child. - int _child_idx; - - /// Current row batch of current child. We reset the pointer to a new RowBatch - /// when switching to a different child. - std::unique_ptr _child_batch; - - /// Index of current row in child_row_batch_. - int _child_row_idx; - - typedef void (*UnionMaterializeBatchFn)(UnionNode*, RowBatch*, uint8_t**); - /// Vector of pointers to codegen'ed materialize_batch functions. The vector contains one - /// function for each child. The size of the vector should be equal to the number of - /// children. If a child is passthrough, there should be a nullptr for that child. If - /// Codegen is disabled, there should be a nullptr for every child. - std::vector _codegend_union_materialize_batch_fns; - - /// Saved from the last to GetNext() on the current child. - bool _child_eos; - - /// Index of current const result expr list. - int _const_expr_list_idx; - - /// Index of the child that needs to be closed on the next GetNext() call. Should be set - /// to -1 if no child needs to be closed. - int _to_close_child_idx; - - // Time spent to evaluates exprs and materializes the results - RuntimeProfile::Counter* _materialize_exprs_evaluate_timer = nullptr; - - /// END: Members that must be Reset() - ///////////////////////////////////////// - - /// The following GetNext* functions don't apply the limit. It must be enforced by the - /// caller. - - /// GetNext() for the passthrough case. We pass 'row_batch' directly into the GetNext() - /// call on the child. - Status get_next_pass_through(RuntimeState* state, RowBatch* row_batch); - - /// GetNext() for the materialized case. Materializes and evaluates rows from each - /// non-passthrough child. - Status get_next_materialized(RuntimeState* state, RowBatch* row_batch); - - /// GetNext() for the constant expression case. - Status get_next_const(RuntimeState* state, RowBatch* row_batch); - - /// Evaluates exprs for the current child and materializes the results into 'tuple_buf', - /// which is attached to 'dst_batch'. Runs until 'dst_batch' is at capacity, or all rows - /// have been consumed from the current child batch. Updates '_child_row_idx'. - void materialize_batch(RowBatch* dst_batch, uint8_t** tuple_buf); - - /// Evaluates 'exprs' over 'row', materializes the results in 'tuple_buf'. - /// and appends the new tuple to 'dst_batch'. Increments '_num_rows_returned'. - void materialize_exprs(const std::vector& exprs, TupleRow* row, - uint8_t* tuple_buf, RowBatch* dst_batch); - - Status get_error_msg(const std::vector& exprs); - - /// Returns true if the child at 'child_idx' can be passed through. - bool is_child_passthrough(int child_idx) const { - DCHECK_LT(child_idx, _children.size()); - return child_idx < _first_materialized_child_idx; - } - - /// Returns true if there are still rows to be returned from passthrough children. - bool has_more_passthrough() const { return _child_idx < _first_materialized_child_idx; } - - /// Returns true if there are still rows to be returned from children that need - /// materialization. - bool has_more_materialized() const { - return _first_materialized_child_idx != _children.size() && _child_idx < _children.size(); - } - - /// Returns true if there are still rows to be returned from constant expressions. - bool has_more_const(const RuntimeState* state) const { - return state->per_fragment_instance_idx() == 0 && - _const_expr_list_idx < _const_expr_lists.size(); - } -}; - -} // namespace doris diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 9b2c7149ed..8ebed0ccdb 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -24,7 +24,6 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exec/parquet_scanner.h" #include "olap/row.h" #include "olap/rowset/rowset_id_generator.h" #include "olap/rowset/rowset_meta_manager.h" @@ -33,6 +32,7 @@ #include "olap/tablet.h" #include "olap/tablet_schema.h" #include "runtime/exec_env.h" +#include "vec/exec/vparquet_scanner.h" namespace doris { using namespace ErrorCode; @@ -821,9 +821,9 @@ Status PushBrokerReader::init(const Schema* schema, const TBrokerScanRange& t_sc BaseScanner* scanner = nullptr; switch (t_scan_range.ranges[0].format_type) { case TFileFormatType::FORMAT_PARQUET: - scanner = new ParquetScanner(_runtime_state.get(), _runtime_profile, t_scan_range.params, - t_scan_range.ranges, t_scan_range.broker_addresses, - _pre_filter_texprs, _counter.get()); + scanner = new vectorized::VParquetScanner( + _runtime_state.get(), _runtime_profile, t_scan_range.params, t_scan_range.ranges, + t_scan_range.broker_addresses, _pre_filter_texprs, _counter.get()); break; default: LOG(WARNING) << "Unsupported file format type: " << t_scan_range.ranges[0].format_type; diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index f57117cd3e..3d42ae0faf 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -27,8 +27,6 @@ set(RUNTIME_FILES broker_mgr.cpp buffer_control_block.cpp client_cache.cpp - data_stream_mgr.cpp - data_stream_sender.cpp datetime_value.cpp descriptors.cpp exec_env.cpp @@ -38,8 +36,6 @@ set(RUNTIME_FILES plan_fragment_executor.cpp primitive_type.cpp raw_value.cpp - result_sink.cpp - result_file_sink.cpp result_buffer_mgr.cpp result_writer.cpp row_batch.cpp @@ -64,9 +60,6 @@ set(RUNTIME_FILES disk_io_mgr_reader_context.cc disk_io_mgr_scan_range.cc buffered_block_mgr2.cc - spill_sorter.cc - sorted_run_merger.cc - data_stream_recvr.cc buffered_tuple_stream2.cc buffered_tuple_stream3.cc export_sink.cpp @@ -94,7 +87,6 @@ set(RUNTIME_FILES result_queue_mgr.cpp memory_scratch_sink.cpp external_scan_context_mgr.cpp - file_result_writer.cpp mysql_result_writer.cpp memory/system_allocator.cpp memory/chunk_allocator.cpp diff --git a/be/src/runtime/data_stream_mgr.cpp b/be/src/runtime/data_stream_mgr.cpp deleted file mode 100644 index b0d1dbd8f2..0000000000 --- a/be/src/runtime/data_stream_mgr.cpp +++ /dev/null @@ -1,213 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-mgr.cc -// and modified by Doris - -#include "runtime/data_stream_mgr.h" - -#include -#include - -#include "gen_cpp/BackendService.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/types.pb.h" // PUniqueId -#include "runtime/data_stream_recvr.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/doris_metrics.h" - -namespace doris { - -DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(data_stream_receiver_count, MetricUnit::NOUNIT); -DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fragment_endpoint_count, MetricUnit::NOUNIT); - -using std::mutex; -using std::shared_ptr; -using std::unique_lock; -using std::lock_guard; - -DataStreamMgr::DataStreamMgr() { - REGISTER_HOOK_METRIC(data_stream_receiver_count, [this]() { - // lock_guard l(_lock); - return _receiver_map.size(); - }); - REGISTER_HOOK_METRIC(fragment_endpoint_count, [this]() { - // lock_guard l(_lock); - return _fragment_stream_set.size(); - }); -} - -DataStreamMgr::~DataStreamMgr() { - DEREGISTER_HOOK_METRIC(data_stream_receiver_count); - DEREGISTER_HOOK_METRIC(fragment_endpoint_count); -} -inline uint32_t DataStreamMgr::get_hash_value(const TUniqueId& fragment_instance_id, - PlanNodeId node_id) { - uint32_t value = RawValue::get_hash_value(&fragment_instance_id.lo, TYPE_BIGINT, 0); - value = RawValue::get_hash_value(&fragment_instance_id.hi, TYPE_BIGINT, value); - value = RawValue::get_hash_value(&node_id, TYPE_INT, value); - return value; -} - -shared_ptr DataStreamMgr::create_recvr( - RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int num_senders, int buffer_size, RuntimeProfile* profile, - bool is_merging, std::shared_ptr sub_plan_query_statistics_recvr) { - DCHECK(profile != nullptr); - VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id - << ", node=" << dest_node_id; - shared_ptr recvr( - new DataStreamRecvr(this, row_desc, fragment_instance_id, dest_node_id, num_senders, - is_merging, buffer_size, profile, sub_plan_query_statistics_recvr)); - uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id); - lock_guard l(_lock); - _fragment_stream_set.insert(std::make_pair(fragment_instance_id, dest_node_id)); - _receiver_map.insert(std::make_pair(hash_value, recvr)); - return recvr; -} - -shared_ptr DataStreamMgr::find_recvr(const TUniqueId& fragment_instance_id, - PlanNodeId node_id, bool acquire_lock) { - VLOG_ROW << "looking up fragment_instance_id=" << fragment_instance_id << ", node=" << node_id; - size_t hash_value = get_hash_value(fragment_instance_id, node_id); - if (acquire_lock) { - _lock.lock(); - } - std::pair range = - _receiver_map.equal_range(hash_value); - while (range.first != range.second) { - shared_ptr recvr = range.first->second; - if (recvr->fragment_instance_id() == fragment_instance_id && - recvr->dest_node_id() == node_id) { - if (acquire_lock) { - _lock.unlock(); - } - return recvr; - } - ++range.first; - } - if (acquire_lock) { - _lock.unlock(); - } - return shared_ptr(); -} - -Status DataStreamMgr::transmit_data(const PTransmitDataParams* request, - ::google::protobuf::Closure** done) { - const PUniqueId& finst_id = request->finst_id(); - TUniqueId t_finst_id; - t_finst_id.hi = finst_id.hi(); - t_finst_id.lo = finst_id.lo(); - shared_ptr recvr = find_recvr(t_finst_id, request->node_id()); - if (recvr == nullptr) { - // The receiver may remove itself from the receiver map via deregister_recvr() - // at any time without considering the remaining number of senders. - // As a consequence, find_recvr() may return an innocuous nullptr if a thread - // calling deregister_recvr() beat the thread calling find_recvr() - // in acquiring _lock. - // TODO: Rethink the lifecycle of DataStreamRecvr to distinguish - // errors from receiver-initiated teardowns. - return Status::OK(); - } - - // request can only be used before calling recvr's add_batch or when request - // is the last for the sender, because request maybe released after it's batch - // is consumed by ExchangeNode. - if (request->has_query_statistics()) { - recvr->add_sub_plan_statistics(request->query_statistics(), request->sender_id()); - } - - bool eos = request->eos(); - if (request->has_row_batch()) { - recvr->add_batch(request->row_batch(), request->sender_id(), request->be_number(), - request->packet_seq(), eos ? nullptr : done); - } - - if (eos) { - recvr->remove_sender(request->sender_id(), request->be_number()); - } - return Status::OK(); -} - -Status DataStreamMgr::deregister_recvr(const TUniqueId& fragment_instance_id, PlanNodeId node_id) { - std::shared_ptr targert_recvr; - VLOG_QUERY << "deregister_recvr(): fragment_instance_id=" << fragment_instance_id - << ", node=" << node_id; - size_t hash_value = get_hash_value(fragment_instance_id, node_id); - { - lock_guard l(_lock); - std::pair range = - _receiver_map.equal_range(hash_value); - while (range.first != range.second) { - const shared_ptr& recvr = range.first->second; - if (recvr->fragment_instance_id() == fragment_instance_id && - recvr->dest_node_id() == node_id) { - targert_recvr = recvr; - _fragment_stream_set.erase( - std::make_pair(recvr->fragment_instance_id(), recvr->dest_node_id())); - _receiver_map.erase(range.first); - break; - } - ++range.first; - } - } - - // Notify concurrent add_data() requests that the stream has been terminated. - // cancel_stream maybe take a long time, so we handle it out of lock. - if (targert_recvr) { - targert_recvr->cancel_stream(); - return Status::OK(); - } else { - std::stringstream err; - err << "unknown row receiver id: fragment_instance_id=" << fragment_instance_id - << " node_id=" << node_id; - LOG(ERROR) << err.str(); - return Status::InternalError(err.str()); - } -} - -void DataStreamMgr::cancel(const TUniqueId& fragment_instance_id) { - VLOG_QUERY << "cancelling all streams for fragment=" << fragment_instance_id; - std::vector> recvrs; - { - lock_guard l(_lock); - FragmentStreamSet::iterator i = - _fragment_stream_set.lower_bound(std::make_pair(fragment_instance_id, 0)); - while (i != _fragment_stream_set.end() && i->first == fragment_instance_id) { - shared_ptr recvr = find_recvr(i->first, i->second, false); - if (recvr == nullptr) { - // keep going but at least log it - std::stringstream err; - err << "cancel(): missing in stream_map: fragment=" << i->first - << " node=" << i->second; - LOG(ERROR) << err.str(); - } else { - recvrs.push_back(recvr); - } - ++i; - } - } - - // cancel_stream maybe take a long time, so we handle it out of lock. - for (auto& it : recvrs) { - it->cancel_stream(); - } -} - -} // namespace doris diff --git a/be/src/runtime/data_stream_mgr.h b/be/src/runtime/data_stream_mgr.h deleted file mode 100644 index 43411beb85..0000000000 --- a/be/src/runtime/data_stream_mgr.h +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-mgr.h -// and modified by Doris - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/object_pool.h" -#include "common/status.h" -#include "gen_cpp/Types_types.h" // for TUniqueId -#include "gen_cpp/internal_service.pb.h" -#include "runtime/descriptors.h" // for PlanNodeId -#include "runtime/query_statistics.h" -#include "util/runtime_profile.h" - -namespace google { -namespace protobuf { -class Closure; -} -} // namespace google - -namespace doris { - -class DescriptorTbl; -class DataStreamRecvr; -class RowBatch; -class RuntimeState; -class PRowBatch; -class PUniqueId; - -// Singleton class which manages all incoming data streams at a backend node. It -// provides both producer and consumer functionality for each data stream. -// - dorisBackend service threads use this to add incoming data to streams -// in response to TransmitData rpcs (add_data()) or to signal end-of-stream conditions -// (close_sender()). -// - Exchange nodes extract data from an incoming stream via a DataStreamRecvr, -// which is created with create_recvr(). -// -// DataStreamMgr also allows asynchronous cancellation of streams via cancel() -// which unblocks all DataStreamRecvr::GetBatch() calls that are made on behalf -// of the cancelled fragment id. -// -// TODO: The recv buffers used in DataStreamRecvr should count against -// per-query memory limits. -class DataStreamMgr { -public: - DataStreamMgr(); - ~DataStreamMgr(); - - // Create a receiver for a specific fragment_instance_id/node_id destination; - // If is_merging is true, the receiver maintains a separate queue of incoming row - // batches for each sender and merges the sorted streams from each sender into a - // single stream. - // Ownership of the receiver is shared between this DataStream mgr instance and the - // caller. - std::shared_ptr create_recvr( - RuntimeState* state, const RowDescriptor& row_desc, - const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, - int buffer_size, RuntimeProfile* profile, bool is_merging, - std::shared_ptr sub_plan_query_statistics_recvr); - - Status transmit_data(const PTransmitDataParams* request, ::google::protobuf::Closure** done); - - // Closes all receivers registered for fragment_instance_id immediately. - void cancel(const TUniqueId& fragment_instance_id); - -private: - friend class DataStreamRecvr; - friend class DataStreamSender; - - // protects all fields below - std::mutex _lock; - - // map from hash value of fragment instance id/node id pair to stream receivers; - // Ownership of the stream revcr is shared between this instance and the caller of - // create_recvr(). - // we don't want to create a map, DataStreamRecvr*>, - // because that requires a bunch of copying of ids for lookup - typedef std::unordered_multimap> StreamMap; - StreamMap _receiver_map; - - // less-than ordering for pair - struct ComparisonOp { - bool operator()(const std::pair& a, - const std::pair& b) const { - if (a.first.hi < b.first.hi) { - return true; - } else if (a.first.hi > b.first.hi) { - return false; - } else if (a.first.lo < b.first.lo) { - return true; - } else if (a.first.lo > b.first.lo) { - return false; - } - return a.second < b.second; - } - }; - - // ordered set of registered streams' fragment instance id/node id - typedef std::set, ComparisonOp> FragmentStreamSet; - FragmentStreamSet _fragment_stream_set; - - // Return the receiver for given fragment_instance_id/node_id, - // or nullptr if not found. If 'acquire_lock' is false, assumes _lock is already being - // held and won't try to acquire it. - std::shared_ptr find_recvr(const TUniqueId& fragment_instance_id, - PlanNodeId node_id, bool acquire_lock = true); - - // Remove receiver block for fragment_instance_id/node_id from the map. - Status deregister_recvr(const TUniqueId& fragment_instance_id, PlanNodeId node_id); - - uint32_t get_hash_value(const TUniqueId& fragment_instance_id, PlanNodeId node_id); -}; - -} // namespace doris diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc deleted file mode 100644 index 59d46102b3..0000000000 --- a/be/src/runtime/data_stream_recvr.cc +++ /dev/null @@ -1,531 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-recvr.cc -// and modified by Doris - -#include "runtime/data_stream_recvr.h" - -#include - -#include -#include -#include -#include - -#include "gen_cpp/data.pb.h" -#include "runtime/data_stream_mgr.h" -#include "runtime/row_batch.h" -#include "runtime/sorted_run_merger.h" -#include "runtime/thread_context.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -using std::list; -using std::vector; -using std::pair; -using std::make_pair; - -using std::condition_variable; -using std::mutex; -using std::unique_ptr; -using std::unique_lock; -using std::try_lock; -using std::lock_guard; -using std::mem_fn; - -namespace doris { - -class ThreadClosure : public google::protobuf::Closure { -public: - void Run() { _cv.notify_one(); } - void wait(unique_lock& lock) { _cv.wait(lock); } - -private: - condition_variable _cv; -}; -// Implements a blocking queue of row batches from one or more senders. One queue -// is maintained per sender if _is_merging is true for the enclosing receiver, otherwise -// rows from all senders are placed in the same queue. -class DataStreamRecvr::SenderQueue { -public: - SenderQueue(DataStreamRecvr* parent_recvr, int num_senders, RuntimeProfile* profile); - - ~SenderQueue() {} - - // Return the next batch form this sender queue. Sets the returned batch in _cur_batch. - // A returned batch that is not filled to capacity does *not* indicate - // end-of-stream. - // The call blocks until another batch arrives or all senders close - // their channels. The returned batch is owned by the sender queue. The caller - // must acquire data from the returned batch before the next call to get_batch(). - Status get_batch(RowBatch** next_batch); - - // Adds a row batch to this sender queue if this stream has not been cancelled; - // blocks if this will make the stream exceed its buffer limit. - // If the total size of the batches in this queue would exceed the allowed buffer size, - // the queue is considered full and the call blocks until a batch is dequeued. - void add_batch(const PRowBatch& pb_batch, int be_number, int64_t packet_seq, - ::google::protobuf::Closure** done); - - void add_batch(RowBatch* batch, bool use_move); - - // Decrement the number of remaining senders for this queue and signal eos ("new data") - // if the count drops to 0. The number of senders will be 1 for a merging - // DataStreamRecvr. - void decrement_senders(int sender_id); - - // Set cancellation flag and signal cancellation to receiver and sender. Subsequent - // incoming batches will be dropped. - void cancel(); - - // Must be called once to cleanup any queued resources. - void close(); - - // Returns the current batch from this queue being processed by a consumer. - RowBatch* current_batch() const { - { return _current_batch.get(); } - } - -private: - // Receiver of which this queue is a member. - DataStreamRecvr* _recvr; - - // protects all subsequent data. - mutex _lock; - - // if true, the receiver fragment for this stream got cancelled - bool _is_cancelled; - - // number of senders which haven't closed the channel yet - // (if it drops to 0, end-of-stream is true) - int _num_remaining_senders; - - // signal arrival of new batch or the eos/cancelled condition - condition_variable _data_arrival_cv; - - // signal removal of data by stream consumer - condition_variable _data_removal_cv; - - // queue of (batch length, batch) pairs. The SenderQueue block owns memory to - // these batches. They are handed off to the caller via get_batch. - typedef list> RowBatchQueue; - RowBatchQueue _batch_queue; - - // The batch that was most recently returned via get_batch(), i.e. the current batch - // from this queue being processed by a consumer. Is destroyed when the next batch - // is retrieved. - unique_ptr _current_batch; - - // Set to true when the first batch has been received - bool _received_first_batch; - - std::unordered_set _sender_eos_set; // sender_id - std::unordered_map _packet_seq_map; // be_number => packet_seq - std::deque> _pending_closures; - std::unordered_map> _local_closure; -}; - -DataStreamRecvr::SenderQueue::SenderQueue(DataStreamRecvr* parent_recvr, int num_senders, - RuntimeProfile* profile) - : _recvr(parent_recvr), - _is_cancelled(false), - _num_remaining_senders(num_senders), - _received_first_batch(false) {} - -Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) { - unique_lock l(_lock); - // wait until something shows up or we know we're done - while (!_is_cancelled && _batch_queue.empty() && _num_remaining_senders > 0) { - VLOG_ROW << "wait arrival fragment_instance_id=" << _recvr->fragment_instance_id() - << " node=" << _recvr->dest_node_id(); - // Don't count time spent waiting on the sender as active time. - CANCEL_SAFE_SCOPED_TIMER(_recvr->_data_arrival_timer, &_is_cancelled); - CANCEL_SAFE_SCOPED_TIMER( - _received_first_batch ? nullptr : _recvr->_first_batch_wait_total_timer, - &_is_cancelled); - _data_arrival_cv.wait(l); - } - - // _cur_batch must be replaced with the returned batch. - _current_batch.reset(); - *next_batch = nullptr; - if (_is_cancelled) { - return Status::Cancelled("Cancelled"); - } - - if (_batch_queue.empty()) { - DCHECK_EQ(_num_remaining_senders, 0); - return Status::OK(); - } - - _received_first_batch = true; - - DCHECK(!_batch_queue.empty()); - RowBatch* result = _batch_queue.front().second; - _recvr->_num_buffered_bytes -= _batch_queue.front().first; - VLOG_ROW << "fetched #rows=" << result->num_rows(); - _batch_queue.pop_front(); - // _data_removal_cv.notify_one(); - _current_batch.reset(result); - *next_batch = _current_batch.get(); - - if (!_pending_closures.empty()) { - auto closure_pair = _pending_closures.front(); - closure_pair.first->Run(); - _pending_closures.pop_front(); - - closure_pair.second.stop(); - _recvr->_buffer_full_total_timer->update(closure_pair.second.elapsed_time()); - } - - return Status::OK(); -} - -void DataStreamRecvr::SenderQueue::add_batch(const PRowBatch& pb_batch, int be_number, - int64_t packet_seq, - ::google::protobuf::Closure** done) { - // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook, - // limit memory via DataStreamRecvr::exceeds_limit. - STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - lock_guard l(_lock); - if (_is_cancelled) { - return; - } - auto iter = _packet_seq_map.find(be_number); - if (iter != _packet_seq_map.end()) { - if (iter->second >= packet_seq) { - LOG(WARNING) << "packet already exist [cur_packet_id= " << iter->second - << " receive_packet_id=" << packet_seq << "]"; - return; - } - iter->second = packet_seq; - } else { - _packet_seq_map.emplace(be_number, packet_seq); - } - - size_t batch_size = RowBatch::get_batch_size(pb_batch); - COUNTER_UPDATE(_recvr->_bytes_received_counter, batch_size); - - // Following situation will match the following condition. - // Sender send a packet failed, then close the channel. - // but closed packet reach first, then the failed packet. - // Then meet the assert - // we remove the assert - // DCHECK_GT(_num_remaining_senders, 0); - if (_num_remaining_senders <= 0) { - DCHECK(_sender_eos_set.end() != _sender_eos_set.find(be_number)); - return; - } - - // We always accept the batch regardless of buffer limit, to avoid rpc pipeline stall. - // If exceed buffer limit, we just do not response ACK to client, so the client won't - // send data until receive ACK. - // Note that if this be needs to receive data from N BEs, the size of buffer - // may reach as many as (buffer_size + n * buffer_size) - // - // Note: It's important that we enqueue thrift_batch regardless of buffer limit if - // the queue is currently empty. In the case of a merging receiver, batches are - // received from a specific queue based on data order, and the pipeline will stall - // if the merger is waiting for data from an empty queue that cannot be filled - // because the limit has been reached. - if (_is_cancelled) { - return; - } - - RowBatch* batch = nullptr; - { - SCOPED_TIMER(_recvr->_deserialize_row_batch_timer); - // Note: if this function makes a row batch, the batch *must* be added - // to _batch_queue. It is not valid to create the row batch and destroy - // it in this thread. - batch = new RowBatch(_recvr->row_desc(), pb_batch); - } - - VLOG_ROW << "added #rows=" << batch->num_rows() << " batch_size=" << batch_size << "\n"; - _batch_queue.emplace_back(batch_size, batch); - // if done is nullptr, this function can't delay this response - if (done != nullptr && _recvr->exceeds_limit(batch_size)) { - MonotonicStopWatch monotonicStopWatch; - monotonicStopWatch.start(); - DCHECK(*done != nullptr); - _pending_closures.emplace_back(*done, monotonicStopWatch); - *done = nullptr; - } - _recvr->_num_buffered_bytes += batch_size; - _data_arrival_cv.notify_one(); -} - -void DataStreamRecvr::SenderQueue::add_batch(RowBatch* batch, bool use_move) { - // Avoid deadlock when calling SenderQueue::cancel() in tcmalloc hook, - // limit memory via DataStreamRecvr::exceeds_limit. - STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - unique_lock l(_lock); - if (_is_cancelled) { - return; - } - RowBatch* nbatch = new RowBatch(_recvr->row_desc(), batch->capacity()); - if (use_move) { - nbatch->acquire_state(batch); - } else { - batch->deep_copy_to(nbatch); - } - int batch_size = nbatch->total_byte_size(); - _batch_queue.emplace_back(batch_size, nbatch); - _data_arrival_cv.notify_one(); - if (_recvr->exceeds_limit(batch_size)) { - std::thread::id tid = std::this_thread::get_id(); - MonotonicStopWatch monotonicStopWatch; - monotonicStopWatch.start(); - auto iter = _local_closure.find(tid); - if (iter == _local_closure.end()) { - _local_closure.emplace(tid, new ThreadClosure); - iter = _local_closure.find(tid); - } - _pending_closures.emplace_back(iter->second.get(), monotonicStopWatch); - iter->second->wait(l); - } - _recvr->_num_buffered_bytes += batch_size; -} - -void DataStreamRecvr::SenderQueue::decrement_senders(int be_number) { - lock_guard l(_lock); - if (_sender_eos_set.end() != _sender_eos_set.find(be_number)) { - return; - } - _sender_eos_set.insert(be_number); - DCHECK_GT(_num_remaining_senders, 0); - _num_remaining_senders--; - VLOG_FILE << "decremented senders: fragment_instance_id=" << _recvr->fragment_instance_id() - << " node_id=" << _recvr->dest_node_id() << " #senders=" << _num_remaining_senders; - if (_num_remaining_senders == 0) { - _data_arrival_cv.notify_one(); - } -} - -void DataStreamRecvr::SenderQueue::cancel() { - { - lock_guard l(_lock); - if (_is_cancelled) { - return; - } - _is_cancelled = true; - VLOG_QUERY << "cancelled stream: _fragment_instance_id=" << _recvr->fragment_instance_id() - << " node_id=" << _recvr->dest_node_id(); - } - // Wake up all threads waiting to produce/consume batches. They will all - // notice that the stream is cancelled and handle it. - _data_arrival_cv.notify_all(); - // _data_removal_cv.notify_all(); - // PeriodicCounterUpdater::StopTimeSeriesCounter( - // _recvr->_bytes_received_time_series_counter); - - { - std::lock_guard l(_lock); - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } - _pending_closures.clear(); - } -} - -void DataStreamRecvr::SenderQueue::close() { - { - // If _is_cancelled is not set to true, there may be concurrent send - // which add batch to _batch_queue. The batch added after _batch_queue - // is clear will be memory leak - std::lock_guard l(_lock); - _is_cancelled = true; - - for (auto closure_pair : _pending_closures) { - closure_pair.first->Run(); - } - _pending_closures.clear(); - } - - // Delete any batches queued in _batch_queue - for (RowBatchQueue::iterator it = _batch_queue.begin(); it != _batch_queue.end(); ++it) { - delete it->second; - } - - _current_batch.reset(); -} - -Status DataStreamRecvr::create_merger(const TupleRowComparator& less_than) { - DCHECK(_is_merging); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - vector child_input_batch_suppliers; - // Create the merger that will a single stream of sorted rows. - _merger.reset(new SortedRunMerger(less_than, &_row_desc, _profile, false)); - - for (int i = 0; i < _sender_queues.size(); ++i) { - child_input_batch_suppliers.emplace_back( - bind(mem_fn(&SenderQueue::get_batch), _sender_queues[i], std::placeholders::_1)); - } - RETURN_IF_ERROR(_merger->prepare(child_input_batch_suppliers)); - return Status::OK(); -} - -Status DataStreamRecvr::create_parallel_merger(const TupleRowComparator& less_than, - uint32_t batch_size) { - DCHECK(_is_merging); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - vector child_input_batch_suppliers; - - // Create the merger that will a single stream of sorted rows. - _merger.reset(new SortedRunMerger(less_than, &_row_desc, _profile, false)); - - // There we chose parallel merge, we should make thread execute more parallel - // to minimized the computation of top merger - // top merger: have child merger to supplier data - // child merger: have sender queue to supplier data, each merger start a thread to merge data firstly - // sender queue: the data from other node - // Before parallel merge, if we have 81 sender queue, data is 1000, the computation is 1000 * log(81) - // After parallel merge, the computation is MAX(1000 * log(2), 500 * log(41)) - // Now we only support max 3 merge child, because: - // we have N _sender_queue, M merge child. the best way is log(N / M) = M * log(M) - // So if N = 8, M = 2 - // N = 81, M = 3 - // N = 1024, M = 4 - // normally the N is lower than 1024, so we chose 8 <= N < 81, M = 2 - // N >= 81, M = 3 - auto parallel_thread = _sender_queues.size() < 81 ? 2 : 3; - auto step = _sender_queues.size() / parallel_thread + 1; - for (int i = 0; i < _sender_queues.size(); i += step) { - // Create the merger that will a single stream of sorted rows. - std::unique_ptr child_merger( - new ChildSortedRunMerger(less_than, &_row_desc, _profile, batch_size, false)); - vector input_batch_suppliers; - for (int j = i; j < std::min((size_t)i + step, _sender_queues.size()); ++j) { - input_batch_suppliers.emplace_back(bind(mem_fn(&SenderQueue::get_batch), - _sender_queues[j], std::placeholders::_1)); - } - child_merger->prepare(input_batch_suppliers); - - child_input_batch_suppliers.emplace_back(bind(mem_fn(&SortedRunMerger::get_batch), - child_merger.get(), std::placeholders::_1)); - _child_mergers.emplace_back(std::move(child_merger)); - } - RETURN_IF_ERROR(_merger->prepare(child_input_batch_suppliers, true)); - - return Status::OK(); -} - -void DataStreamRecvr::transfer_all_resources(RowBatch* transfer_batch) { - // _child_mergers is not empty, means use parallel merge need transfer resource from - // _sender queue. - // the need transfer resources from child_merger input_row_batch - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - if (!_child_mergers.empty()) { - _merger->transfer_all_resources(transfer_batch); - } else { - for (SenderQueue* sender_queue : _sender_queues) { - if (sender_queue->current_batch() != nullptr) { - sender_queue->current_batch()->transfer_resource_ownership(transfer_batch); - } - } - } -} - -DataStreamRecvr::DataStreamRecvr( - DataStreamMgr* stream_mgr, const RowDescriptor& row_desc, - const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, - bool is_merging, int total_buffer_limit, RuntimeProfile* profile, - std::shared_ptr sub_plan_query_statistics_recvr) - : _mgr(stream_mgr), - _fragment_instance_id(fragment_instance_id), - _dest_node_id(dest_node_id), - _total_buffer_limit(total_buffer_limit), - _row_desc(row_desc), - _is_merging(is_merging), - _num_buffered_bytes(0), - _profile(profile), - _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) { - _mem_tracker = std::make_unique( - "DataStreamRecvr:" + print_id(_fragment_instance_id), _profile); - - // Create one queue per sender if is_merging is true. - int num_queues = is_merging ? num_senders : 1; - _sender_queues.reserve(num_queues); - int num_sender_per_queue = is_merging ? 1 : num_senders; - for (int i = 0; i < num_queues; ++i) { - SenderQueue* queue = - _sender_queue_pool.add(new SenderQueue(this, num_sender_per_queue, profile)); - _sender_queues.push_back(queue); - } - - // Initialize the counters - _bytes_received_counter = ADD_COUNTER(_profile, "BytesReceived", TUnit::BYTES); - _deserialize_row_batch_timer = ADD_TIMER(_profile, "DeserializeRowBatchTimer"); - _data_arrival_timer = ADD_TIMER(_profile, "DataArrivalWaitTime"); - _buffer_full_total_timer = ADD_TIMER(_profile, "SendersBlockedTotalTimer(*)"); - _first_batch_wait_total_timer = ADD_TIMER(_profile, "FirstBatchArrivalWaitTime"); -} - -Status DataStreamRecvr::get_next(RowBatch* output_batch, bool* eos) { - DCHECK(_merger.get() != nullptr); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - return _merger->get_next(output_batch, eos); -} - -void DataStreamRecvr::add_batch(const PRowBatch& batch, int sender_id, int be_number, - int64_t packet_seq, ::google::protobuf::Closure** done) { - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - int use_sender_id = _is_merging ? sender_id : 0; - // Add all batches to the same queue if _is_merging is false. - _sender_queues[use_sender_id]->add_batch(batch, be_number, packet_seq, done); -} - -void DataStreamRecvr::add_batch(RowBatch* batch, int sender_id, bool use_move) { - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - int use_sender_id = _is_merging ? sender_id : 0; - _sender_queues[use_sender_id]->add_batch(batch, use_move); -} - -void DataStreamRecvr::remove_sender(int sender_id, int be_number) { - int use_sender_id = _is_merging ? sender_id : 0; - _sender_queues[use_sender_id]->decrement_senders(be_number); -} - -void DataStreamRecvr::cancel_stream() { - for (int i = 0; i < _sender_queues.size(); ++i) { - _sender_queues[i]->cancel(); - } -} - -void DataStreamRecvr::close() { - for (int i = 0; i < _sender_queues.size(); ++i) { - _sender_queues[i]->close(); - } - // Remove this receiver from the DataStreamMgr that created it. - // TODO: log error msg - _mgr->deregister_recvr(fragment_instance_id(), dest_node_id()); - _mgr = nullptr; - _merger.reset(); -} - -DataStreamRecvr::~DataStreamRecvr() { - DCHECK(_mgr == nullptr) << "Must call close()"; -} - -Status DataStreamRecvr::get_batch(RowBatch** next_batch) { - DCHECK(!_is_merging); - DCHECK_EQ(_sender_queues.size(), 1); - return _sender_queues[0]->get_batch(next_batch); -} - -} // namespace doris diff --git a/be/src/runtime/data_stream_recvr.h b/be/src/runtime/data_stream_recvr.h deleted file mode 100644 index efb036b5dd..0000000000 --- a/be/src/runtime/data_stream_recvr.h +++ /dev/null @@ -1,202 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-recvr.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "common/status.h" -#include "gen_cpp/Types_types.h" // for TUniqueId -#include "runtime/descriptors.h" -#include "runtime/query_statistics.h" -#include "util/tuple_row_compare.h" - -namespace google { -namespace protobuf { -class Closure; -} -} // namespace google - -namespace doris { - -class DataStreamMgr; -class SortedRunMerger; -class MemTracker; -class RowBatch; -class RuntimeProfile; -class PRowBatch; - -// Single receiver of an m:n data stream. -// DataStreamRecvr maintains one or more queues of row batches received by a -// DataStreamMgr from one or more sender fragment instances. -// Receivers are created via DataStreamMgr::CreateRecvr(). -// Ownership of a stream recvr is shared between the DataStreamMgr that created it and -// the caller of DataStreamMgr::CreateRecvr() (i.e. the exchange node) -// -// The _is_merging member determines if the recvr merges input streams from different -// sender fragment instances according to a specified sort order. -// If _is_merging = false : Only one batch queue is maintained for row batches from all -// sender fragment instances. These row batches are returned one at a time via -// get_batch(). -// If _is_merging is true : One queue is created for the batches from each distinct -// sender. A SortedRunMerger instance must be created via create_merger() prior to -// retrieving any rows from the receiver. Rows are retrieved from the receiver via -// get_next(RowBatch* output_batch, int limit, bool eos). After the final call to -// get_next(), transfer_all_resources() must be called to transfer resources from the input -// batches from each sender to the caller's output batch. -// The receiver sets deep_copy to false on the merger - resources are transferred from -// the input batches from each sender queue to the merger to the output batch by the -// merger itself as it processes each run. -// -// DataStreamRecvr::close() must be called by the caller of CreateRecvr() to remove the -// recvr instance from the tracking structure of its DataStreamMgr in all cases. -class DataStreamRecvr { -public: - ~DataStreamRecvr(); - - // Returns next row batch in data stream; blocks if there aren't any. - // Retains ownership of the returned batch. The caller must acquire data from the - // returned batch before the next call to get_batch(). A nullptr returned batch indicated - // eos. Must only be called if _is_merging is false. - // TODO: This is currently only exposed to the non-merging version of the exchange. - // Refactor so both merging and non-merging exchange use get_next(RowBatch*, bool* eos). - Status get_batch(RowBatch** next_batch); - - void add_batch(RowBatch* batch, int sender_id, bool use_move); - - // Deregister from DataStreamMgr instance, which shares ownership of this instance. - void close(); - - // Create a SortedRunMerger instance to merge rows from multiple sender according to the - // specified row comparator. Fetches the first batches from the individual sender - // queues. The exprs used in less_than must have already been prepared and opened. - Status create_merger(const TupleRowComparator& less_than); - - Status create_parallel_merger(const TupleRowComparator& less_than, uint32_t batch_size); - // Fill output_batch with the next batch of rows obtained by merging the per-sender - // input streams. Must only be called if _is_merging is true. - Status get_next(RowBatch* output_batch, bool* eos); - - // Transfer all resources from the current batches being processed from each sender - // queue to the specified batch. - void transfer_all_resources(RowBatch* transfer_batch); - - const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } - PlanNodeId dest_node_id() const { return _dest_node_id; } - const RowDescriptor& row_desc() const { return _row_desc; } - - void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) { - _sub_plan_query_statistics_recvr->insert(statistics, sender_id); - } - - // Indicate that a particular sender is done. Delegated to the appropriate - // sender queue. Called from DataStreamMgr. - void remove_sender(int sender_id, int be_number); - -private: - friend class DataStreamMgr; - class SenderQueue; - - DataStreamRecvr(DataStreamMgr* stream_mgr, const RowDescriptor& row_desc, - const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders, - bool is_merging, int total_buffer_limit, RuntimeProfile* profile, - std::shared_ptr sub_plan_query_statistics_recvr); - - // If receive queue is full, done is enqueue pending, and return with *done is nullptr - void add_batch(const PRowBatch& batch, int sender_id, int be_number, int64_t packet_seq, - ::google::protobuf::Closure** done); - - // Empties the sender queues and notifies all waiting consumers of cancellation. - void cancel_stream(); - - // Return true if the addition of a new batch of size 'batch_size' would exceed the - // total buffer limit. - bool exceeds_limit(int batch_size) { - return _num_buffered_bytes + batch_size > _total_buffer_limit; - } - - // DataStreamMgr instance used to create this recvr. (Not owned) - DataStreamMgr* _mgr; - - // Fragment and node id of the destination exchange node this receiver is used by. - TUniqueId _fragment_instance_id; - PlanNodeId _dest_node_id; - - // soft upper limit on the total amount of buffering allowed for this stream across - // all sender queues. we stop acking incoming data once the amount of buffered data - // exceeds this value - int _total_buffer_limit; - - // Row schema, copied from the caller of CreateRecvr(). - RowDescriptor _row_desc; - - // True if this reciver merges incoming rows from different senders. Per-sender - // row batch queues are maintained in this case. - bool _is_merging; - - // total number of bytes held across all sender queues. - std::atomic _num_buffered_bytes; - - // Memtracker for batches in the sender queue(s). - std::unique_ptr _mem_tracker; - - // One or more queues of row batches received from senders. If _is_merging is true, - // there is one SenderQueue for each sender. Otherwise, row batches from all senders - // are placed in the same SenderQueue. The SenderQueue instances are owned by the - // receiver and placed in _sender_queue_pool. - std::vector _sender_queues; - - // SortedRunMerger used to merge rows from different senders. - std::unique_ptr _merger; - - std::vector> _child_mergers; - - // Pool of sender queues. - ObjectPool _sender_queue_pool; - - // Runtime profile storing the counters below. - RuntimeProfile* _profile; - - // Number of bytes received - RuntimeProfile::Counter* _bytes_received_counter; - - // Time series of number of bytes received, samples _bytes_received_counter - // RuntimeProfile::TimeSeriesCounter* _bytes_received_time_series_counter; - RuntimeProfile::Counter* _deserialize_row_batch_timer; - - // Time spent waiting until the first batch arrives across all queues. - // TODO: Turn this into a wall-clock timer. - RuntimeProfile::Counter* _first_batch_wait_total_timer; - - // Total time (summed across all threads) spent waiting for the - // recv buffer to be drained so that new batches can be - // added. Remote plan fragments are blocked for the same amount of - // time. - RuntimeProfile::Counter* _buffer_full_total_timer; - - // Sub plan query statistics receiver. - std::shared_ptr _sub_plan_query_statistics_recvr; - - // Total time spent waiting for data to arrive in the recv buffer - RuntimeProfile::Counter* _data_arrival_timer; -}; - -} // end namespace doris diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp deleted file mode 100644 index 1394ee8875..0000000000 --- a/be/src/runtime/data_stream_sender.cpp +++ /dev/null @@ -1,701 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-sender.cc -// and modified by Doris - -#include "runtime/data_stream_sender.h" - -#include - -#include -#include -#include - -#include "common/config.h" -#include "common/logging.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/client_cache.h" -#include "runtime/data_stream_mgr.h" -#include "runtime/data_stream_recvr.h" -#include "runtime/descriptors.h" -#include "runtime/dpp_sink_internal.h" -#include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/thread_context.h" -#include "runtime/tuple_row.h" -#include "service/backend_options.h" -#include "service/brpc.h" -#include "util/brpc_client_cache.h" -#include "util/debug_util.h" -#include "util/defer_op.h" -#include "util/network_util.h" -#include "util/proto_util.h" -#include "util/thrift_client.h" -#include "util/thrift_util.h" - -namespace doris { - -DataStreamSender::Channel::Channel(DataStreamSender* parent, const RowDescriptor& row_desc, - const TNetworkAddress& brpc_dest, - const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, - int buffer_size, bool is_transfer_chain, - bool send_query_statistics_with_every_batch) - : _parent(parent), - _buffer_size(buffer_size), - _row_desc(row_desc), - _fragment_instance_id(fragment_instance_id), - _dest_node_id(dest_node_id), - _packet_seq(0), - _need_close(false), - _be_number(0), - _brpc_dest_addr(brpc_dest), - _ch_cur_pb_batch(&_ch_pb_batch1), - _is_transfer_chain(is_transfer_chain), - _send_query_statistics_with_every_batch(send_query_statistics_with_every_batch) { - std::string localhost = BackendOptions::get_localhost(); - _is_local = _brpc_dest_addr.hostname == localhost && _brpc_dest_addr.port == config::brpc_port; - if (_is_local) { - VLOG_NOTICE << "will use local exechange, dest_node_id:" << _dest_node_id; - } -} - -DataStreamSender::Channel::~Channel() { - if (_closure != nullptr && _closure->unref()) { - delete _closure; - } - // release this before request destruct - _brpc_request.release_finst_id(); - _brpc_request.release_query_id(); -} - -Status DataStreamSender::Channel::init(RuntimeState* state) { - _be_number = state->be_number(); - - // TODO: figure out how to size _batch - int capacity = std::max(1, _buffer_size / std::max(_row_desc.get_row_size(), 1)); - _batch.reset(new RowBatch(_row_desc, capacity)); - - if (_brpc_dest_addr.hostname.empty()) { - LOG(WARNING) << "there is no brpc destination address's hostname" - ", maybe version is not compatible."; - return Status::InternalError("no brpc destination"); - } - - // initialize brpc request - _finst_id.set_hi(_fragment_instance_id.hi); - _finst_id.set_lo(_fragment_instance_id.lo); - _brpc_request.set_allocated_finst_id(&_finst_id); - - _query_id.set_hi(state->query_id().hi); - _query_id.set_lo(state->query_id().lo); - _brpc_request.set_allocated_query_id(&_query_id); - - _brpc_request.set_node_id(_dest_node_id); - _brpc_request.set_sender_id(_parent->_sender_id); - _brpc_request.set_be_number(_be_number); - - _brpc_timeout_ms = std::min(3600, state->query_options().query_timeout) * 1000; - - // In bucket shuffle join will set fragment_instance_id (-1, -1) - // to build a camouflaged empty channel. the ip and port is '0.0.0.0:0" - // so the empty channel not need call function close_internal() - _need_close = (_fragment_instance_id.hi != -1 && _fragment_instance_id.lo != -1); - if (_need_close) { - _brpc_stub = state->exec_env()->brpc_internal_client_cache()->get_client(_brpc_dest_addr); - if (!_brpc_stub) { - std::string msg = fmt::format("Get rpc stub failed, dest_addr={}:{}", - _brpc_dest_addr.hostname, _brpc_dest_addr.port); - LOG(WARNING) << msg; - return Status::InternalError(msg); - } - } - _state = state; - return Status::OK(); -} - -Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) { - if (_closure == nullptr) { - _closure = new RefCountClosure(); - _closure->ref(); - } else { - RETURN_IF_ERROR(_wait_last_brpc()); - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); - _closure->cntl.Reset(); - } - VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id - << " dest_node=" << _dest_node_id; - if (_is_transfer_chain && (_send_query_statistics_with_every_batch || eos)) { - auto statistic = _brpc_request.mutable_query_statistics(); - _parent->_query_statistics->to_pb(statistic); - } - - _brpc_request.set_eos(eos); - if (batch != nullptr) { - _brpc_request.set_allocated_row_batch(batch); - } - _brpc_request.set_packet_seq(_packet_seq++); - - _closure->ref(); - _closure->cntl.set_timeout_ms(_brpc_timeout_ms); - - if (_parent->_transfer_large_data_by_brpc && _brpc_request.has_row_batch() && - _brpc_request.row_batch().has_tuple_data() && - _brpc_request.ByteSizeLong() > MIN_HTTP_BRPC_SIZE) { - Status st = request_embed_attachment_contain_tuple>( - &_brpc_request, _closure); - RETURN_IF_ERROR(st); - std::string brpc_url = - fmt::format("http://{}:{}", _brpc_dest_addr.hostname, _brpc_dest_addr.port); - std::shared_ptr _brpc_http_stub = - _state->exec_env()->brpc_internal_client_cache()->get_new_client_no_cache(brpc_url, - "http"); - _closure->cntl.http_request().uri() = - brpc_url + "/PInternalServiceImpl/transmit_data_by_http"; - _closure->cntl.http_request().set_method(brpc::HTTP_METHOD_POST); - _closure->cntl.http_request().set_content_type("application/json"); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); - _brpc_http_stub->transmit_data_by_http(&_closure->cntl, NULL, &_closure->result, - _closure); - } - } else { - _closure->cntl.http_request().Clear(); - { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->orphan_mem_tracker()); - _brpc_stub->transmit_data(&_closure->cntl, &_brpc_request, &_closure->result, _closure); - } - } - - if (batch != nullptr) { - _brpc_request.release_row_batch(); - } - return Status::OK(); -} - -Status DataStreamSender::Channel::add_row(TupleRow* row) { - if (_fragment_instance_id.lo == -1) { - return Status::OK(); - } - int row_num = _batch->add_row(); - - if (row_num == RowBatch::INVALID_ROW_INDEX) { - // _batch is full, let's send it; but first wait for an ongoing - // transmission to finish before modifying _thrift_batch - RETURN_IF_ERROR(send_current_batch()); - row_num = _batch->add_row(); - DCHECK_NE(row_num, RowBatch::INVALID_ROW_INDEX); - } - - TupleRow* dest = _batch->get_row(row_num); - _batch->copy_row(row, dest); - const std::vector& descs = _row_desc.tuple_descriptors(); - - for (int i = 0; i < descs.size(); ++i) { - if (UNLIKELY(row->get_tuple(i) == nullptr)) { - dest->set_tuple(i, nullptr); - } else { - dest->set_tuple(i, row->get_tuple(i)->deep_copy(*descs[i], _batch->tuple_data_pool())); - } - } - - _batch->commit_last_row(); - return Status::OK(); -} - -Status DataStreamSender::Channel::send_current_batch(bool eos) { - if (is_local()) { - return send_local_batch(eos); - } - RETURN_IF_ERROR(_parent->serialize_batch(_batch.get(), _ch_cur_pb_batch)); - _batch->reset(); - RETURN_IF_ERROR(send_batch(_ch_cur_pb_batch, eos)); - ch_roll_pb_batch(); - return Status::OK(); -} - -void DataStreamSender::Channel::ch_roll_pb_batch() { - _ch_cur_pb_batch = (_ch_cur_pb_batch == &_ch_pb_batch1 ? &_ch_pb_batch2 : &_ch_pb_batch1); -} - -Status DataStreamSender::Channel::send_local_batch(bool eos) { - std::shared_ptr recvr = _parent->state()->exec_env()->stream_mgr()->find_recvr( - _fragment_instance_id, _dest_node_id); - if (recvr != nullptr) { - recvr->add_batch(_batch.get(), _parent->_sender_id, true); - if (eos) { - recvr->remove_sender(_parent->_sender_id, _be_number); - } - COUNTER_UPDATE(_parent->_local_bytes_send_counter, _batch->total_byte_size()); - } - _batch->reset(); - return Status::OK(); -} - -Status DataStreamSender::Channel::send_local_batch(RowBatch* batch, bool use_move) { - std::shared_ptr recvr = _parent->state()->exec_env()->stream_mgr()->find_recvr( - _fragment_instance_id, _dest_node_id); - if (recvr != nullptr) { - recvr->add_batch(batch, _parent->_sender_id, use_move); - COUNTER_UPDATE(_parent->_local_bytes_send_counter, batch->total_byte_size()); - } - return Status::OK(); -} - -Status DataStreamSender::Channel::close_internal() { - if (!_need_close) { - return Status::OK(); - } - VLOG_RPC << "Channel::close() instance_id=" << _fragment_instance_id - << " dest_node=" << _dest_node_id - << " #rows= " << ((_batch == nullptr) ? 0 : _batch->num_rows()); - if (_batch != nullptr && _batch->num_rows() > 0) { - RETURN_IF_ERROR(send_current_batch(true)); - } else { - RETURN_IF_ERROR(send_batch(nullptr, true)); - } - // Don't wait for the last packet to finish, left it to close_wait. - return Status::OK(); -} - -Status DataStreamSender::Channel::close(RuntimeState* state) { - Status st = close_internal(); - if (!st.ok()) { - state->log_error(st.to_string()); - } - return st; -} - -Status DataStreamSender::Channel::close_wait(RuntimeState* state) { - if (_need_close) { - Status st = _wait_last_brpc(); - if (!st.ok()) { - state->log_error(st.to_string()); - } - _need_close = false; - return st; - } - _batch.reset(); - return Status::OK(); -} - -DataStreamSender::DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc) - : _row_desc(row_desc), - _cur_pb_batch(&_pb_batch1), - _pool(pool), - _sender_id(sender_id), - _serialize_batch_timer(nullptr), - _bytes_sent_counter(nullptr), - _local_bytes_send_counter(nullptr) {} - -DataStreamSender::DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc, - const TDataStreamSink& sink, - const std::vector& destinations, - int per_channel_buffer_size, - bool send_query_statistics_with_every_batch) - : _row_desc(row_desc), - _profile(nullptr), - _cur_pb_batch(&_pb_batch1), - _pool(pool), - _sender_id(sender_id), - _serialize_batch_timer(nullptr), - _bytes_sent_counter(nullptr), - _local_bytes_send_counter(nullptr), - _current_channel_idx(0), - _part_type(sink.output_partition.type), - _ignore_not_found(sink.__isset.ignore_not_found ? sink.ignore_not_found : true), - _dest_node_id(sink.dest_node_id), - _transfer_large_data_by_brpc(config::transfer_large_data_by_brpc) { - DCHECK_GT(destinations.size(), 0); - DCHECK(sink.output_partition.type == TPartitionType::UNPARTITIONED || - sink.output_partition.type == TPartitionType::HASH_PARTITIONED || - sink.output_partition.type == TPartitionType::RANDOM || - sink.output_partition.type == TPartitionType::RANGE_PARTITIONED || - sink.output_partition.type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED); - // TODO: use something like google3's linked_ptr here (scoped_ptr isn't copyable - - std::map fragment_id_to_channel_index; - for (int i = 0; i < destinations.size(); ++i) { - // Select first dest as transfer chain. - bool is_transfer_chain = (i == 0); - const auto& fragment_instance_id = destinations[i].fragment_instance_id; - if (fragment_id_to_channel_index.find(fragment_instance_id.lo) == - fragment_id_to_channel_index.end()) { - _channel_shared_ptrs.emplace_back( - new Channel(this, row_desc, destinations[i].brpc_server, fragment_instance_id, - sink.dest_node_id, per_channel_buffer_size, is_transfer_chain, - send_query_statistics_with_every_batch)); - fragment_id_to_channel_index.insert( - {fragment_instance_id.lo, _channel_shared_ptrs.size() - 1}); - _channels.push_back(_channel_shared_ptrs.back().get()); - } else { - _channel_shared_ptrs.emplace_back( - _channel_shared_ptrs[fragment_id_to_channel_index[fragment_instance_id.lo]]); - } - } - _name = "DataStreamSender"; -} - -// We use the PartitionRange to compare here. It should not be a member function of PartitionInfo -// class because there are some other member in it. -// TODO: move this to dpp_sink -static bool compare_part_use_range(const PartitionInfo* v1, const PartitionInfo* v2) { - return v1->range() < v2->range(); -} - -Status DataStreamSender::init(const TDataSink& tsink) { - RETURN_IF_ERROR(DataSink::init(tsink)); - const TDataStreamSink& t_stream_sink = tsink.stream_sink; - if (_part_type == TPartitionType::HASH_PARTITIONED || - _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs)); - } else if (_part_type == TPartitionType::RANGE_PARTITIONED) { - // Range partition - // Partition Exprs - RETURN_IF_ERROR(Expr::create_expr_trees( - _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs)); - // Partition infos - int num_parts = t_stream_sink.output_partition.partition_infos.size(); - if (num_parts == 0) { - return Status::InternalError("Empty partition info."); - } - for (int i = 0; i < num_parts; ++i) { - PartitionInfo* info = _pool->add(new PartitionInfo()); - RETURN_IF_ERROR(PartitionInfo::from_thrift( - _pool, t_stream_sink.output_partition.partition_infos[i], info)); - _partition_infos.push_back(info); - } - // partitions should be in ascending order - std::sort(_partition_infos.begin(), _partition_infos.end(), compare_part_use_range); - } else { - } - - return Status::OK(); -} - -Status DataStreamSender::prepare(RuntimeState* state) { - RETURN_IF_ERROR(DataSink::prepare(state)); - _state = state; - std::string instances; - for (const auto& channel : _channels) { - if (instances.empty()) { - instances = channel->get_fragment_instance_id_str(); - } else { - instances += ", "; - instances += channel->get_fragment_instance_id_str(); - } - } - std::stringstream title; - title << "DataStreamSender (dst_id=" << _dest_node_id << ", dst_fragments=[" << instances - << "])"; - _profile = _pool->add(new RuntimeProfile(title.str())); - SCOPED_TIMER(_profile->total_time_counter()); - _mem_tracker = std::make_unique( - "DataStreamSender:" + print_id(state->fragment_instance_id()), _profile); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - - if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) { - std::random_device rd; - std::mt19937 g(rd()); - shuffle(_channels.begin(), _channels.end(), g); - } else if (_part_type == TPartitionType::HASH_PARTITIONED || - _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { - RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc)); - } else { - RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc)); - for (auto iter : _partition_infos) { - RETURN_IF_ERROR(iter->prepare(state, _row_desc)); - } - } - - _bytes_sent_counter = ADD_COUNTER(profile(), "BytesSent", TUnit::BYTES); - _uncompressed_bytes_counter = ADD_COUNTER(profile(), "UncompressedRowBatchSize", TUnit::BYTES); - _ignore_rows = ADD_COUNTER(profile(), "IgnoreRows", TUnit::UNIT); - _serialize_batch_timer = ADD_TIMER(profile(), "SerializeBatchTime"); - _overall_throughput = profile()->add_derived_counter( - "OverallThroughput", TUnit::BYTES_PER_SECOND, - std::bind(&RuntimeProfile::units_per_second, _bytes_sent_counter, - profile()->total_time_counter()), - ""); - _local_bytes_send_counter = ADD_COUNTER(profile(), "LocalBytesSent", TUnit::BYTES); - for (int i = 0; i < _channels.size(); ++i) { - RETURN_IF_ERROR(_channels[i]->init(state)); - } - - return Status::OK(); -} - -DataStreamSender::~DataStreamSender() { - // TODO: check that sender was either already closed() or there was an error - // on some channel - _channel_shared_ptrs.clear(); -} - -Status DataStreamSender::open(RuntimeState* state) { - DCHECK(state != nullptr); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - RETURN_IF_ERROR(Expr::open(_partition_expr_ctxs, state)); - for (auto iter : _partition_infos) { - RETURN_IF_ERROR(iter->open(state)); - } - return Status::OK(); -} - -Status DataStreamSender::send(RuntimeState* state, RowBatch* batch) { - SCOPED_TIMER(_profile->total_time_counter()); - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - - // Unpartition or _channel size - if (_part_type == TPartitionType::UNPARTITIONED || _channels.size() == 1) { - int local_size = 0; - for (auto channel : _channels) { - if (channel->is_local()) { - local_size++; - } - } - if (local_size == _channels.size()) { - // we don't have to serialize - for (auto channel : _channels) { - RETURN_IF_ERROR(channel->send_local_batch(batch, false)); - } - } else { - RETURN_IF_ERROR(serialize_batch(batch, _cur_pb_batch, _channels.size())); - for (auto channel : _channels) { - if (channel->is_local()) { - RETURN_IF_ERROR(channel->send_local_batch(batch, false)); - } else { - RETURN_IF_ERROR(channel->send_batch(_cur_pb_batch)); - } - } - // rollover - _roll_pb_batch(); - } - } else if (_part_type == TPartitionType::RANDOM) { - // Round-robin batches among channels. Wait for the current channel to finish its - // rpc before overwriting its batch. - Channel* current_channel = _channels[_current_channel_idx]; - if (current_channel->is_local()) { - RETURN_IF_ERROR(current_channel->send_local_batch(batch, false)); - } else { - RETURN_IF_ERROR(serialize_batch(batch, current_channel->ch_cur_pb_batch())); - RETURN_IF_ERROR(current_channel->send_batch(current_channel->ch_cur_pb_batch())); - current_channel->ch_roll_pb_batch(); - } - _current_channel_idx = (_current_channel_idx + 1) % _channels.size(); - } else if (_part_type == TPartitionType::HASH_PARTITIONED) { - // hash-partition batch's rows across channels - int num_channels = _channels.size(); - - for (int i = 0; i < batch->num_rows(); ++i) { - TupleRow* row = batch->get_row(i); - size_t hash_val = 0; - - for (auto ctx : _partition_expr_ctxs) { - void* partition_val = ctx->get_value(row); - // We can't use the crc hash function here because it does not result - // in uncorrelated hashes with different seeds. Instead we must use - // fvn hash. - // TODO: fix crc hash/GetHashValue() - hash_val = - RawValue::get_hash_value_fvn(partition_val, ctx->root()->type(), hash_val); - } - auto target_channel_id = hash_val % num_channels; - RETURN_IF_ERROR(_channels[target_channel_id]->add_row(row)); - } - } else if (_part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { - // hash-partition batch's rows across channels - int num_channels = _channel_shared_ptrs.size(); - - for (int i = 0; i < batch->num_rows(); ++i) { - TupleRow* row = batch->get_row(i); - size_t hash_val = 0; - - for (auto ctx : _partition_expr_ctxs) { - void* partition_val = ctx->get_value(row); - // We must use the crc hash function to make sure the hash val equal - // to left table data distribute hash val - hash_val = RawValue::zlib_crc32(partition_val, ctx->root()->type(), hash_val); - } - auto target_channel_id = hash_val % num_channels; - RETURN_IF_ERROR(_channel_shared_ptrs[target_channel_id]->add_row(row)); - } - } else { - // Range partition - int num_channels = _channels.size(); - int ignore_rows = 0; - for (int i = 0; i < batch->num_rows(); ++i) { - TupleRow* row = batch->get_row(i); - size_t hash_val = 0; - bool ignore = false; - RETURN_IF_ERROR(compute_range_part_code(state, row, &hash_val, &ignore)); - if (ignore) { - // skip this row - ignore_rows++; - continue; - } - RETURN_IF_ERROR(_channels[hash_val % num_channels]->add_row(row)); - } - COUNTER_UPDATE(_ignore_rows, ignore_rows); - } - - return Status::OK(); -} - -void DataStreamSender::_roll_pb_batch() { - _cur_pb_batch = (_cur_pb_batch == &_pb_batch1 ? &_pb_batch2 : &_pb_batch1); -} - -int DataStreamSender::binary_find_partition(const PartRangeKey& key) const { - int low = 0; - int high = _partition_infos.size() - 1; - - VLOG_ROW << "range key: " << key.debug_string() << std::endl; - while (low <= high) { - int mid = low + (high - low) / 2; - int cmp = _partition_infos[mid]->range().compare_key(key); - if (cmp == 0) { - return mid; - } else if (cmp < 0) { // current < partition[mid] - low = mid + 1; - } else { - high = mid - 1; - } - } - - return -1; -} - -Status DataStreamSender::find_partition(RuntimeState* state, TupleRow* row, PartitionInfo** info, - bool* ignore) { - if (_partition_expr_ctxs.size() == 0) { - *info = _partition_infos[0]; - return Status::OK(); - } else { - *ignore = false; - // use binary search to get the right partition. - ExprContext* ctx = _partition_expr_ctxs[0]; - void* partition_val = ctx->get_value(row); - // construct a PartRangeKey - PartRangeKey tmpPartKey; - if (nullptr != partition_val) { - RETURN_IF_ERROR( - PartRangeKey::from_value(ctx->root()->type().type, partition_val, &tmpPartKey)); - } else { - tmpPartKey = PartRangeKey::neg_infinite(); - } - - int part_index = binary_find_partition(tmpPartKey); - if (part_index < 0) { - if (_ignore_not_found) { - // TODO(zc): add counter to compute its - std::stringstream error_log; - error_log << "there is no corresponding partition for this key: "; - ctx->print_value(row, &error_log); - LOG(INFO) << error_log.str(); - *ignore = true; - return Status::OK(); - } else { - std::stringstream error_log; - error_log << "there is no corresponding partition for this key: "; - ctx->print_value(row, &error_log); - return Status::InternalError(error_log.str()); - } - } - *info = _partition_infos[part_index]; - } - return Status::OK(); -} - -Status DataStreamSender::process_distribute(RuntimeState* state, TupleRow* row, - const PartitionInfo* part, size_t* code) { - uint32_t hash_val = 0; - for (auto& ctx : part->distributed_expr_ctxs()) { - void* partition_val = ctx->get_value(row); - if (partition_val != nullptr) { - hash_val = RawValue::zlib_crc32(partition_val, ctx->root()->type(), hash_val); - } else { - hash_val = HashUtil::zlib_crc_hash_null(hash_val); - } - } - hash_val %= part->distributed_bucket(); - - int64_t part_id = part->id(); - *code = RawValue::get_hash_value_fvn(&part_id, TypeDescriptor(TYPE_BIGINT), hash_val); - - return Status::OK(); -} - -Status DataStreamSender::compute_range_part_code(RuntimeState* state, TupleRow* row, - size_t* hash_value, bool* ignore) { - // process partition - PartitionInfo* part = nullptr; - RETURN_IF_ERROR(find_partition(state, row, &part, ignore)); - if (*ignore) { - return Status::OK(); - } - // process distribute - RETURN_IF_ERROR(process_distribute(state, row, part, hash_value)); - return Status::OK(); -} - -Status DataStreamSender::close(RuntimeState* state, Status exec_status) { - // TODO: only close channels that didn't have any errors - // make all channels close parallel - if (_closed) return Status::OK(); - Status final_st = Status::OK(); - for (int i = 0; i < _channels.size(); ++i) { - Status st = _channels[i]->close(state); - if (!st.ok() && final_st.ok()) { - final_st = st; - } - } - // wait all channels to finish - for (int i = 0; i < _channels.size(); ++i) { - Status st = _channels[i]->close_wait(state); - if (!st.ok() && final_st.ok()) { - final_st = st; - } - } - for (auto iter : _partition_infos) { - iter->close(state); - } - Expr::close(_partition_expr_ctxs, state); - - DataSink::close(state, exec_status); - return final_st; -} - -Status DataStreamSender::serialize_batch(RowBatch* src, PRowBatch* dest, int num_receivers) { - { - SCOPED_TIMER(_serialize_batch_timer); - size_t uncompressed_bytes = 0, compressed_bytes = 0; - RETURN_IF_ERROR(src->serialize(dest, &uncompressed_bytes, &compressed_bytes, - _transfer_large_data_by_brpc)); - COUNTER_UPDATE(_bytes_sent_counter, compressed_bytes * num_receivers); - COUNTER_UPDATE(_uncompressed_bytes_counter, uncompressed_bytes * num_receivers); - } - - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/runtime/data_stream_sender.h b/be/src/runtime/data_stream_sender.h deleted file mode 100644 index 542164638a..0000000000 --- a/be/src/runtime/data_stream_sender.h +++ /dev/null @@ -1,282 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/data-stream-sender.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "common/global_types.h" -#include "common/object_pool.h" -#include "common/status.h" -#include "exec/data_sink.h" -#include "gen_cpp/BackendService.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/Types_types.h" -#include "gen_cpp/data.pb.h" // for PRowBatch -#include "gen_cpp/internal_service.pb.h" -#include "service/backend_options.h" -#include "service/brpc.h" -#include "util/ref_count_closure.h" -#include "util/runtime_profile.h" -#include "util/uid_util.h" - -namespace doris { - -class ExprContext; -class RowBatch; -class RowDescriptor; -class TDataStreamSink; -class TNetworkAddress; -class TPlanFragmentDestination; -class PartitionInfo; -class TupleRow; -class PartRangeKey; -class MemTracker; - -// Single sender of an m:n data stream. -// Row batch data is routed to destinations based on the provided -// partitioning specification. -// *Not* thread-safe. -// -// TODO: capture stats that describe distribution of rows/data volume -// across channels. -class DataStreamSender : public DataSink { -public: - DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc); - // Construct a sender according to the output specification (sink), - // sending to the given destinations. - // Per_channel_buffer_size is the buffer size allocated to each channel - // and is specified in bytes. - // The RowDescriptor must live until close() is called. - // NOTE: supported partition types are UNPARTITIONED (broadcast) and HASH_PARTITIONED - DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc, - const TDataStreamSink& sink, - const std::vector& destinations, - int per_channel_buffer_size, bool send_query_statistics_with_every_batch); - virtual ~DataStreamSender(); - - virtual Status init(const TDataSink& thrift_sink); - - // Must be called before other API calls, and before the codegen'd IR module is - // compiled (i.e. in an ExecNode's Prepare() function). - virtual Status prepare(RuntimeState* state); - - // Must be called before Send() or Close(), and after the codegen'd IR module is - // compiled (i.e. in an ExecNode's Open() function). - virtual Status open(RuntimeState* state); - - // send data in 'batch' to destination nodes according to partitioning - // specification provided in c'tor. - // Blocks until all rows in batch are placed in their appropriate outgoing - // buffers (ie, blocks if there are still in-flight rpcs from the last - // send() call). - virtual Status send(RuntimeState* state, RowBatch* batch); - - // Flush all buffered data and close all existing channels to destination - // hosts. Further send() calls are illegal after calling close(). - virtual Status close(RuntimeState* state, Status exec_status); - - /// Serializes the src batch into the dest thrift batch. Maintains metrics. - /// num_receivers is the number of receivers this batch will be sent to. Only - /// used to maintain metrics. - Status serialize_batch(RowBatch* src, PRowBatch* dest, int num_receivers = 1); - - // Return total number of bytes sent in RowBatch.data. If batches are - // broadcast to multiple receivers, they are counted once per receiver. - virtual RuntimeProfile* profile() { return _profile; } - - RuntimeState* state() { return _state; } - -protected: - const RowDescriptor& _row_desc; - // A channel sends data asynchronously via calls to transmit_data - // to a single destination ipaddress/node. - // It has a fixed-capacity buffer and allows the caller either to add rows to - // that buffer individually (AddRow()), or circumvent the buffer altogether and send - // PRowBatches directly (SendBatch()). Either way, there can only be one in-flight RPC - // at any one time (ie, sending will block if the most recent rpc hasn't finished, - // which allows the receiver node to throttle the sender by withholding acks). - // *Not* thread-safe. - class Channel { - public: - Channel(DataStreamSender* parent, const RowDescriptor& row_desc, - const TNetworkAddress& brpc_dest, const TUniqueId& fragment_instance_id, - PlanNodeId dest_node_id, int buffer_size, bool is_transfer_chain, - bool send_query_statistics_with_every_batch); - ~Channel(); - // Initialize channel. - // Returns OK if successful, error indication otherwise. - Status init(RuntimeState* state); - - // Copies a single row into this channel's output buffer and flushes buffer - // if it reaches capacity. - // Returns error status if any of the preceding rpcs failed, OK otherwise. - Status add_row(TupleRow* row); - - // Asynchronously sends a row batch. - // Returns the status of the most recently finished transmit_data - // rpc (or OK if there wasn't one that hasn't been reported yet). - // if batch is nullptr, send the eof packet - Status send_batch(PRowBatch* batch, bool eos = false); - - Status send_local_batch(bool eos); - - Status send_local_batch(RowBatch* batch, bool use_move); - - // Flush buffered rows and close channel. This function don't wait the response - // of close operation, client should call close_wait() to finish channel's close. - // We split one close operation into two phases in order to make multiple channels - // can run parallel. - Status close(RuntimeState* state); - - // Get close wait's response, to finish channel close operation. - Status close_wait(RuntimeState* state); - - PRowBatch* ch_cur_pb_batch() { return _ch_cur_pb_batch; } - - std::string get_fragment_instance_id_str() { - UniqueId uid(_fragment_instance_id); - return uid.to_string(); - } - - TUniqueId get_fragment_instance_id() { return _fragment_instance_id; } - - bool is_local() { return _is_local; } - - Status _wait_last_brpc() { - if (_closure == nullptr) return Status::OK(); - auto cntl = &_closure->cntl; - brpc::Join(cntl->call_id()); - if (cntl->Failed()) { - std::stringstream ss; - ss << "failed to send brpc batch, error=" << berror(cntl->ErrorCode()) - << ", error_text=" << cntl->ErrorText() - << ", client: " << BackendOptions::get_localhost(); - LOG(WARNING) << ss.str(); - return Status::RpcError(ss.str()); - } - return Status::OK(); - } - // Serialize _batch into _thrift_batch and send via send_batch(). - // Returns send_batch() status. - Status send_current_batch(bool eos = false); - Status close_internal(); - // this must be called after calling `send_batch()` - void ch_roll_pb_batch(); - - DataStreamSender* _parent; - int _buffer_size; - - const RowDescriptor& _row_desc; - TUniqueId _fragment_instance_id; - PlanNodeId _dest_node_id; - - int64_t _packet_seq; - - // we're accumulating rows into this batch - std::unique_ptr _batch; - - bool _need_close; - int _be_number; - - TNetworkAddress _brpc_dest_addr; - - // TODO(zc): init used for brpc - PUniqueId _finst_id; - PUniqueId _query_id; - - // serialized batches for broadcasting; we need two so we can write - // one while the other one is still being sent. - // Which is for same reason as `_cur_pb_batch`, `_pb_batch1` and `_pb_batch2` - // in DataStreamSender. - PRowBatch* _ch_cur_pb_batch; - PRowBatch _ch_pb_batch1; - PRowBatch _ch_pb_batch2; - - PTransmitDataParams _brpc_request; - std::shared_ptr _brpc_stub = nullptr; - RefCountClosure* _closure = nullptr; - RuntimeState* _state; - int32_t _brpc_timeout_ms = 500; - // whether the dest can be treated as query statistics transfer chain. - bool _is_transfer_chain; - bool _send_query_statistics_with_every_batch; - bool _is_local; - }; - - RuntimeProfile* _profile; // Allocated from _pool - PRowBatch* _cur_pb_batch; - std::unique_ptr _mem_tracker; - ObjectPool* _pool; - // Sender instance id, unique within a fragment. - int _sender_id; - RuntimeProfile::Counter* _serialize_batch_timer; - RuntimeProfile::Counter* _bytes_sent_counter; - // Used to counter send bytes under local data exchange - RuntimeProfile::Counter* _local_bytes_send_counter; - RuntimeProfile::Counter* _uncompressed_bytes_counter; - RuntimeState* _state; - - std::vector _channels; - std::vector> _channel_shared_ptrs; - -private: - Status compute_range_part_code(RuntimeState* state, TupleRow* row, size_t* hash_value, - bool* ignore); - - int binary_find_partition(const PartRangeKey& key) const; - - Status find_partition(RuntimeState* state, TupleRow* row, PartitionInfo** info, bool* ignore); - - Status process_distribute(RuntimeState* state, TupleRow* row, const PartitionInfo* part, - size_t* hash_val); - - void _roll_pb_batch(); - - int _current_channel_idx; // index of current channel to send to if _random == true - - TPartitionType::type _part_type; - bool _ignore_not_found; - - // serialized batches for broadcasting; we need two so we can write - // one while the other one is still being sent - PRowBatch _pb_batch1; - PRowBatch _pb_batch2; - - std::vector _partition_expr_ctxs; // compute per-row partition values - - // map from range value to partition_id - // sorted in ascending orderi by range for binary search - std::vector _partition_infos; - - RuntimeProfile::Counter* _ignore_rows; - - // Throughput per total time spent in sender - RuntimeProfile::Counter* _overall_throughput; - - // Identifier of the destination plan node. - PlanNodeId _dest_node_id; - - // User can change this config at runtime, avoid it being modified during query or loading process. - bool _transfer_large_data_by_brpc = false; -}; - -} // namespace doris diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 0b69b9e846..0b36a2b8e8 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -107,7 +107,6 @@ public: const bool initialized() const { return _is_init; } const std::string& token() const; ExternalScanContextMgr* external_scan_context_mgr() { return _external_scan_context_mgr; } - DataStreamMgr* stream_mgr() { return _stream_mgr; } doris::vectorized::VDataStreamMgr* vstream_mgr() { return _vstream_mgr; } ResultBufferMgr* result_mgr() { return _result_mgr; } ResultQueueMgr* result_queue_mgr() { return _result_queue_mgr; } @@ -200,7 +199,6 @@ private: std::map _store_path_map; // Leave protected so that subclasses can override ExternalScanContextMgr* _external_scan_context_mgr = nullptr; - DataStreamMgr* _stream_mgr = nullptr; doris::vectorized::VDataStreamMgr* _vstream_mgr = nullptr; ResultBufferMgr* _result_mgr = nullptr; ResultQueueMgr* _result_queue_mgr = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index cf5a2d55f8..81e6b6072f 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -30,7 +30,6 @@ #include "runtime/bufferpool/buffer_pool.h" #include "runtime/cache/result_cache.h" #include "runtime/client_cache.h" -#include "runtime/data_stream_mgr.h" #include "runtime/disk_io_mgr.h" #include "runtime/exec_env.h" #include "runtime/external_scan_context_mgr.h" @@ -90,7 +89,6 @@ Status ExecEnv::_init(const std::vector& store_paths) { } _external_scan_context_mgr = new ExternalScanContextMgr(this); - _stream_mgr = new DataStreamMgr(); _vstream_mgr = new doris::vectorized::VDataStreamMgr(); _result_mgr = new ResultBufferMgr(); _result_queue_mgr = new ResultQueueMgr(); @@ -374,7 +372,6 @@ void ExecEnv::_destroy() { SAFE_DELETE(_backend_client_cache); SAFE_DELETE(_result_mgr); SAFE_DELETE(_result_queue_mgr); - SAFE_DELETE(_stream_mgr); SAFE_DELETE(_stream_load_executor); SAFE_DELETE(_routine_load_task_executor); SAFE_DELETE(_external_scan_context_mgr); diff --git a/be/src/runtime/file_result_writer.cpp b/be/src/runtime/file_result_writer.cpp deleted file mode 100644 index f06be0e693..0000000000 --- a/be/src/runtime/file_result_writer.cpp +++ /dev/null @@ -1,506 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/file_result_writer.h" - -#include "common/consts.h" -#include "exec/parquet_writer.h" -#include "exprs/expr_context.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gutil/strings/numbers.h" -#include "gutil/strings/substitute.h" -#include "io/file_factory.h" -#include "runtime/buffer_control_block.h" -#include "runtime/large_int_value.h" -#include "runtime/primitive_type.h" -#include "runtime/raw_value.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "service/backend_options.h" -#include "util/file_utils.h" -#include "util/mysql_global.h" -#include "util/mysql_row_buffer.h" -#include "util/types.h" -#include "util/uid_util.h" -#include "util/url_coding.h" - -namespace doris { - -const size_t FileResultWriter::OUTSTREAM_BUFFER_SIZE_BYTES = 1024 * 1024; - -FileResultWriter::FileResultWriter(const ResultFileOptions* file_opts, - const TStorageBackendType::type storage_type, - const TUniqueId fragment_instance_id, - const std::vector& output_expr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, - RowBatch* output_batch, bool output_object_data) - : _file_opts(file_opts), - _storage_type(storage_type), - _fragment_instance_id(fragment_instance_id), - _output_expr_ctxs(output_expr_ctxs), - _parent_profile(parent_profile), - _sinker(sinker), - _output_batch(output_batch) { - _output_object_data = output_object_data; -} - -FileResultWriter::~FileResultWriter() { - _close_file_writer(true, true); -} - -Status FileResultWriter::init(RuntimeState* state) { - _state = state; - _init_profile(); - return _create_next_file_writer(); -} - -void FileResultWriter::_init_profile() { - RuntimeProfile* profile = _parent_profile->create_child("FileResultWriter", true, true); - _append_row_batch_timer = ADD_TIMER(profile, "AppendBatchTime"); - _convert_tuple_timer = ADD_CHILD_TIMER(profile, "TupleConvertTime", "AppendBatchTime"); - _file_write_timer = ADD_CHILD_TIMER(profile, "FileWriteTime", "AppendBatchTime"); - _writer_close_timer = ADD_TIMER(profile, "FileWriterCloseTime"); - _written_rows_counter = ADD_COUNTER(profile, "NumWrittenRows", TUnit::UNIT); - _written_data_bytes = ADD_COUNTER(profile, "WrittenDataBytes", TUnit::BYTES); -} - -Status FileResultWriter::_create_success_file() { - std::string file_name; - RETURN_IF_ERROR(_get_success_file_name(&file_name)); - RETURN_IF_ERROR(_create_file_writer(file_name)); - return _close_file_writer(true, true); -} - -Status FileResultWriter::_get_success_file_name(std::string* file_name) { - std::stringstream ss; - ss << _file_opts->file_path << _file_opts->success_file_name; - *file_name = ss.str(); - if (_storage_type == TStorageBackendType::LOCAL) { - } - - return Status::OK(); -} - -Status FileResultWriter::_create_next_file_writer() { - std::string file_name; - RETURN_IF_ERROR(_get_next_file_name(&file_name)); - return _create_file_writer(file_name); -} - -Status FileResultWriter::_create_file_writer(const std::string& file_name) { - RETURN_IF_ERROR(FileFactory::create_file_writer( - FileFactory::convert_storage_type(_storage_type), _state->exec_env(), - _file_opts->broker_addresses, _file_opts->broker_properties, file_name, 0, - _file_writer)); - RETURN_IF_ERROR(_file_writer->open()); - - switch (_file_opts->file_format) { - case TFileFormatType::FORMAT_CSV_PLAIN: - // just use file writer is enough - break; - case TFileFormatType::FORMAT_PARQUET: { - //TODO: in order to consider the compatibility when upgrading, could remove this code after 1.2 - if (_file_opts->is_refactor_before_flag) { - _parquet_writer = new ParquetWriterWrapper(_file_writer.get(), _output_expr_ctxs, - _file_opts->file_properties, - _file_opts->schema, _output_object_data); - } else { - _parquet_writer = new ParquetWriterWrapper( - _file_writer.get(), _output_expr_ctxs, _file_opts->parquet_schemas, - _file_opts->parquet_commpression_type, _file_opts->parquert_disable_dictionary, - _file_opts->parquet_version, _output_object_data); - } - break; - } - default: - return Status::InternalError("unsupported file format: {}", _file_opts->file_format); - } - LOG(INFO) << "create file for exporting query result. file name: " << file_name - << ". query id: " << print_id(_state->query_id()) - << " format:" << _file_opts->file_format; - return Status::OK(); -} - -// file name format as: my_prefix_{fragment_instance_id}_0.csv -Status FileResultWriter::_get_next_file_name(std::string* file_name) { - std::stringstream ss; - ss << _file_opts->file_path << print_id(_fragment_instance_id) << "_" << (_file_idx++) << "." - << _file_format_to_name(); - *file_name = ss.str(); - _header_sent = false; - if (_storage_type == TStorageBackendType::LOCAL) { - // For local file writer, the file_path is a local dir. - // Here we do a simple security verification by checking whether the file exists. - // Because the file path is currently arbitrarily specified by the user, - // Doris is not responsible for ensuring the correctness of the path. - // This is just to prevent overwriting the existing file. - if (FileUtils::check_exist(*file_name)) { - return Status::InternalError("File already exists: {}. Host: {}", *file_name, - BackendOptions::get_localhost()); - } - } - - return Status::OK(); -} - -// file url format as: -// LOCAL: file:///localhost_address/{file_path}{fragment_instance_id}_ -// S3: {file_path}{fragment_instance_id}_ -// BROKER: {file_path}{fragment_instance_id}_ - -Status FileResultWriter::_get_file_url(std::string* file_url) { - std::stringstream ss; - if (_storage_type == TStorageBackendType::LOCAL) { - ss << "file:///" << BackendOptions::get_localhost(); - } - ss << _file_opts->file_path; - ss << print_id(_fragment_instance_id) << "_"; - *file_url = ss.str(); - return Status::OK(); -} - -std::string FileResultWriter::_file_format_to_name() { - switch (_file_opts->file_format) { - case TFileFormatType::FORMAT_CSV_PLAIN: - return "csv"; - case TFileFormatType::FORMAT_PARQUET: - return "parquet"; - default: - return "unknown"; - } -} -std::string FileResultWriter::gen_types() { - std::string types = ""; - int num_columns = _output_expr_ctxs.size(); - for (int i = 0; i < num_columns; ++i) { - types += type_to_string(_output_expr_ctxs[i]->root()->type().type); - if (i < num_columns - 1) { - types += _file_opts->column_separator; - } - } - types += _file_opts->line_delimiter; - return types; -} - -Status FileResultWriter::write_csv_header() { - if (!_header_sent && _header.size() > 0) { - std::string tmp_header = _header; - if (_header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) { - tmp_header += gen_types(); - } - size_t written_len = 0; - RETURN_IF_ERROR(_file_writer->write(reinterpret_cast(tmp_header.c_str()), - tmp_header.size(), &written_len)); - _header_sent = true; - } - return Status::OK(); -} - -Status FileResultWriter::append_row_batch(const RowBatch* batch) { - if (nullptr == batch || 0 == batch->num_rows()) { - return Status::OK(); - } - RETURN_IF_ERROR(write_csv_header()); - SCOPED_TIMER(_append_row_batch_timer); - if (_parquet_writer != nullptr) { - RETURN_IF_ERROR(_write_parquet_file(*batch)); - } else { - RETURN_IF_ERROR(_write_csv_file(*batch)); - } - - _written_rows += batch->num_rows(); - return Status::OK(); -} - -Status FileResultWriter::_write_parquet_file(const RowBatch& batch) { - RETURN_IF_ERROR(_parquet_writer->write(batch)); - // split file if exceed limit - _current_written_bytes = _parquet_writer->written_len(); - return _create_new_file_if_exceed_size(); -} - -Status FileResultWriter::_write_csv_file(const RowBatch& batch) { - int num_rows = batch.num_rows(); - for (int i = 0; i < num_rows; ++i) { - TupleRow* row = batch.get_row(i); - RETURN_IF_ERROR(_write_one_row_as_csv(row)); - } - return _flush_plain_text_outstream(true); -} - -// actually, this logic is same as `ExportSink::gen_row_buffer` -// TODO(cmy): find a way to unify them. -Status FileResultWriter::_write_one_row_as_csv(TupleRow* row) { - { - SCOPED_TIMER(_convert_tuple_timer); - int num_columns = _output_expr_ctxs.size(); - for (int i = 0; i < num_columns; ++i) { - void* item = _output_expr_ctxs[i]->get_value(row); - - if (item == nullptr) { - _plain_text_outstream << NULL_IN_CSV; - if (i < num_columns - 1) { - _plain_text_outstream << _file_opts->column_separator; - } - continue; - } - - switch (_output_expr_ctxs[i]->root()->type().type) { - case TYPE_BOOLEAN: - case TYPE_TINYINT: - _plain_text_outstream << (int)*static_cast(item); - break; - case TYPE_SMALLINT: - _plain_text_outstream << *static_cast(item); - break; - case TYPE_INT: - _plain_text_outstream << *static_cast(item); - break; - case TYPE_BIGINT: - _plain_text_outstream << *static_cast(item); - break; - case TYPE_LARGEINT: - _plain_text_outstream << reinterpret_cast(item)->value; - break; - case TYPE_FLOAT: { - char buffer[MAX_FLOAT_STR_LENGTH + 2]; - float float_value = *static_cast(item); - buffer[0] = '\0'; - int length = FloatToBuffer(float_value, MAX_FLOAT_STR_LENGTH, buffer); - DCHECK(length >= 0) << "gcvt float failed, float value=" << float_value; - _plain_text_outstream << buffer; - break; - } - case TYPE_DOUBLE: { - // To prevent loss of precision on float and double types, - // they are converted to strings before output. - // For example: For a double value 27361919854.929001, - // the direct output of using std::stringstream is 2.73619e+10, - // and after conversion to a string, it outputs 27361919854.929001 - char buffer[MAX_DOUBLE_STR_LENGTH + 2]; - double double_value = *static_cast(item); - buffer[0] = '\0'; - int length = DoubleToBuffer(double_value, MAX_DOUBLE_STR_LENGTH, buffer); - DCHECK(length >= 0) << "gcvt double failed, double value=" << double_value; - _plain_text_outstream << buffer; - break; - } - case TYPE_DATE: - case TYPE_DATETIME: { - char buf[64]; - const DateTimeValue* time_val = (const DateTimeValue*)(item); - time_val->to_string(buf); - _plain_text_outstream << buf; - break; - } - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - const StringValue* string_val = (const StringValue*)(item); - if (string_val->ptr == nullptr) { - if (string_val->len != 0) { - _plain_text_outstream << NULL_IN_CSV; - } - } else { - _plain_text_outstream << std::string(string_val->ptr, string_val->len); - } - break; - } - case TYPE_DECIMALV2: { - const DecimalV2Value decimal_val( - reinterpret_cast(item)->value); - std::string decimal_str; - int output_scale = _output_expr_ctxs[i]->root()->output_scale(); - decimal_str = decimal_val.to_string(output_scale); - _plain_text_outstream << decimal_str; - break; - } - case TYPE_OBJECT: - case TYPE_HLL: { - if (_output_object_data) { - const StringValue* string_val = (const StringValue*)(item); - if (string_val->ptr == nullptr) { - _plain_text_outstream << NULL_IN_CSV; - } else { - std::string base64_str; - base64_encode(string_val->to_string(), &base64_str); - _plain_text_outstream << base64_str; - } - } else { - _plain_text_outstream << NULL_IN_CSV; - } - break; - } - case TYPE_ARRAY: { - auto col_type = _output_expr_ctxs[i]->root()->type(); - int output_scale = _output_expr_ctxs[i]->root()->output_scale(); - RawValue::print_value(item, col_type, output_scale, &_plain_text_outstream); - break; - } - default: { - // not supported type, like BITMAP, HLL, just export null - _plain_text_outstream << NULL_IN_CSV; - } - } - if (i < num_columns - 1) { - _plain_text_outstream << _file_opts->column_separator; - } - } // end for columns - _plain_text_outstream << _file_opts->line_delimiter; - } - - // write one line to file - return _flush_plain_text_outstream(false); -} - -Status FileResultWriter::_flush_plain_text_outstream(bool eos) { - SCOPED_TIMER(_file_write_timer); - size_t pos = _plain_text_outstream.tellp(); - if (pos == 0 || (pos < OUTSTREAM_BUFFER_SIZE_BYTES && !eos)) { - return Status::OK(); - } - - const std::string& buf = _plain_text_outstream.str(); - size_t written_len = 0; - RETURN_IF_ERROR(_file_writer->write(reinterpret_cast(buf.c_str()), buf.size(), - &written_len)); - COUNTER_UPDATE(_written_data_bytes, written_len); - _current_written_bytes += written_len; - - // clear the stream - _plain_text_outstream.str(""); - _plain_text_outstream.clear(); - - // split file if exceed limit - return _create_new_file_if_exceed_size(); -} - -Status FileResultWriter::_create_new_file_if_exceed_size() { - if (_current_written_bytes < _file_opts->max_file_size_bytes) { - return Status::OK(); - } - // current file size exceed the max file size. close this file - // and create new one - { - SCOPED_TIMER(_writer_close_timer); - RETURN_IF_ERROR(_close_file_writer(false)); - } - _current_written_bytes = 0; - return Status::OK(); -} - -Status FileResultWriter::_close_file_writer(bool done, bool only_close) { - if (_parquet_writer != nullptr) { - _parquet_writer->close(); - COUNTER_UPDATE(_written_data_bytes, _current_written_bytes); - delete _parquet_writer; - _parquet_writer = nullptr; - } else if (_file_writer) { - _file_writer->close(); - } - - if (only_close) { - return Status::OK(); - } - - if (!done) { - // not finished, create new file writer for next file - RETURN_IF_ERROR(_create_next_file_writer()); - } else { - // All data is written to file, send statistic result - if (_file_opts->success_file_name != "") { - // write success file, just need to touch an empty file - RETURN_IF_ERROR(_create_success_file()); - } - if (_output_batch == nullptr) { - RETURN_IF_ERROR(_send_result()); - } else { - RETURN_IF_ERROR(_fill_result_batch()); - } - } - return Status::OK(); -} - -Status FileResultWriter::_send_result() { - if (_is_result_sent) { - return Status::OK(); - } - _is_result_sent = true; - - // The final stat result include: - // FileNumber, TotalRows, FileSize and URL - // The type of these field should be consistent with types defined - // in OutFileClause.java of FE. - MysqlRowBuffer row_buffer; - row_buffer.push_int(_file_idx); // file number - row_buffer.push_bigint(_written_rows_counter->value()); // total rows - row_buffer.push_bigint(_written_data_bytes->value()); // file size - std::string file_url; - _get_file_url(&file_url); - row_buffer.push_string(file_url.c_str(), file_url.length()); // url - - std::unique_ptr result = std::make_unique(); - result->result_batch.rows.resize(1); - result->result_batch.rows[0].assign(row_buffer.buf(), row_buffer.length()); - RETURN_NOT_OK_STATUS_WITH_WARN(_sinker->add_batch(result), "failed to send outfile result"); - return Status::OK(); -} - -Status FileResultWriter::_fill_result_batch() { - if (_is_result_sent) { - return Status::OK(); - } - _is_result_sent = true; - - TupleDescriptor* tuple_desc = _output_batch->row_desc().tuple_descriptors()[0]; - Tuple* tuple = (Tuple*)_output_batch->tuple_data_pool()->allocate(tuple_desc->byte_size()); - _output_batch->get_row(_output_batch->add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - MemPool* tuple_pool = _output_batch->tuple_data_pool(); - RawValue::write(&_file_idx, tuple, tuple_desc->slots()[0], tuple_pool); - int64_t written_rows = _written_rows_counter->value(); - RawValue::write(&written_rows, tuple, tuple_desc->slots()[1], tuple_pool); - int64_t written_data_bytes = _written_data_bytes->value(); - RawValue::write(&written_data_bytes, tuple, tuple_desc->slots()[2], tuple_pool); - - StringValue* url_str_val = - reinterpret_cast(tuple->get_slot(tuple_desc->slots()[3]->tuple_offset())); - std::string file_url; - _get_file_url(&file_url); - url_str_val->ptr = (char*)_output_batch->tuple_data_pool()->allocate(file_url.length()); - url_str_val->len = file_url.length(); - memcpy(url_str_val->ptr, file_url.c_str(), url_str_val->len); - - _output_batch->commit_last_row(); - return Status::OK(); -} - -Status FileResultWriter::close() { - // the following 2 profile "_written_rows_counter" and "_writer_close_timer" - // must be outside the `_close_file_writer()`. - // because `_close_file_writer()` may be called in deconstructor, - // at that time, the RuntimeState may already been deconstructed, - // so does the profile in RuntimeState. - COUNTER_SET(_written_rows_counter, _written_rows); - SCOPED_TIMER(_writer_close_timer); - return _close_file_writer(true, false); -} - -} // namespace doris diff --git a/be/src/runtime/file_result_writer.h b/be/src/runtime/file_result_writer.h deleted file mode 100644 index 4e346f3af5..0000000000 --- a/be/src/runtime/file_result_writer.h +++ /dev/null @@ -1,201 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "gen_cpp/DataSinks_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/result_writer.h" -#include "runtime/runtime_state.h" - -namespace doris { - -class ExprContext; -class FileWriter; -class ParquetWriterWrapper; -class RowBatch; -class RuntimeProfile; -class TupleRow; - -struct ResultFileOptions { - // [[deprecated]] - bool is_local_file; - std::string file_path; - TFileFormatType::type file_format; - std::string column_separator; - std::string line_delimiter; - size_t max_file_size_bytes = 1 * 1024 * 1024 * 1024; // 1GB - std::vector broker_addresses; - std::map broker_properties; - std::string success_file_name = ""; - std::vector> schema; //not use in outfile with parquet format - std::map file_properties; //not use in outfile with parquet format - - std::vector parquet_schemas; - TParquetCompressionType::type parquet_commpression_type; - TParquetVersion::type parquet_version; - bool parquert_disable_dictionary; - //note: use outfile with parquet format, have deprecated 9:schema and 10:file_properties - //But in order to consider the compatibility when upgrading, so add a bool to check - //Now the code version is 1.1.2, so when the version is after 1.2, could remove this code. - bool is_refactor_before_flag = false; - std::string orc_schema; - - ResultFileOptions(const TResultFileSinkOptions& t_opt) { - file_path = t_opt.file_path; - file_format = t_opt.file_format; - column_separator = t_opt.__isset.column_separator ? t_opt.column_separator : "\t"; - line_delimiter = t_opt.__isset.line_delimiter ? t_opt.line_delimiter : "\n"; - max_file_size_bytes = - t_opt.__isset.max_file_size_bytes ? t_opt.max_file_size_bytes : max_file_size_bytes; - - is_local_file = true; - if (t_opt.__isset.broker_addresses) { - broker_addresses = t_opt.broker_addresses; - is_local_file = false; - } - if (t_opt.__isset.broker_properties) { - broker_properties = t_opt.broker_properties; - } - if (t_opt.__isset.success_file_name) { - success_file_name = t_opt.success_file_name; - } - if (t_opt.__isset.schema) { - schema = t_opt.schema; - is_refactor_before_flag = true; - } - if (t_opt.__isset.file_properties) { - file_properties = t_opt.file_properties; - } - if (t_opt.__isset.parquet_schemas) { - is_refactor_before_flag = false; - parquet_schemas = t_opt.parquet_schemas; - } - if (t_opt.__isset.parquet_compression_type) { - parquet_commpression_type = t_opt.parquet_compression_type; - } - if (t_opt.__isset.parquet_disable_dictionary) { - parquert_disable_dictionary = t_opt.parquet_disable_dictionary; - } - if (t_opt.__isset.parquet_version) { - parquet_version = t_opt.parquet_version; - } - if (t_opt.__isset.orc_schema) { - orc_schema = t_opt.orc_schema; - } - } -}; - -class BufferControlBlock; -// write result to file -class FileResultWriter final : public ResultWriter { -public: - FileResultWriter(const ResultFileOptions* file_option, - const TStorageBackendType::type storage_type, - const TUniqueId fragment_instance_id, - const std::vector& output_expr_ctxs, - RuntimeProfile* parent_profile, BufferControlBlock* sinker, - RowBatch* output_batch, bool output_object_data); - virtual ~FileResultWriter(); - - virtual Status init(RuntimeState* state) override; - virtual Status append_row_batch(const RowBatch* batch) override; - virtual Status close() override; - - // file result writer always return statistic result in one row - virtual int64_t get_written_rows() const override { return 1; } - - std::string gen_types(); - Status write_csv_header(); - -private: - Status _write_csv_file(const RowBatch& batch); - Status _write_parquet_file(const RowBatch& batch); - Status _write_one_row_as_csv(TupleRow* row); - - // if buffer exceed the limit, write the data buffered in _plain_text_outstream via file_writer - // if eos, write the data even if buffer is not full. - Status _flush_plain_text_outstream(bool eos); - void _init_profile(); - - Status _create_file_writer(const std::string& file_name); - Status _create_next_file_writer(); - Status _create_success_file(); - // get next export file name - Status _get_next_file_name(std::string* file_name); - Status _get_success_file_name(std::string* file_name); - Status _get_file_url(std::string* file_url); - std::string _file_format_to_name(); - // close file writer, and if !done, it will create new writer for next file. - // if only_close is true, this method will just close the file writer and return. - Status _close_file_writer(bool done, bool only_close = false); - // create a new file if current file size exceed limit - Status _create_new_file_if_exceed_size(); - // send the final statistic result - Status _send_result(); - // save result into batch rather than send it - Status _fill_result_batch(); - -private: - RuntimeState* _state; // not owned, set when init - const ResultFileOptions* _file_opts; - TStorageBackendType::type _storage_type; - TUniqueId _fragment_instance_id; - const std::vector& _output_expr_ctxs; - - // If the result file format is plain text, like CSV, this _file_writer is owned by this FileResultWriter. - // If the result file format is Parquet, this _file_writer is owned by _parquet_writer. - std::unique_ptr _file_writer; - // parquet file writer - ParquetWriterWrapper* _parquet_writer = nullptr; - // Used to buffer the export data of plain text - // TODO(cmy): I simply use a stringstrteam to buffer the data, to avoid calling - // file writer's write() for every single row. - // But this cannot solve the problem of a row of data that is too large. - // For example: bitmap_to_string() may return large volumn of data. - // And the speed is relative low, in my test, is about 6.5MB/s. - std::stringstream _plain_text_outstream; - static const size_t OUTSTREAM_BUFFER_SIZE_BYTES; - - // current written bytes, used for split data - int64_t _current_written_bytes = 0; - // the suffix idx of export file name, start at 0 - int _file_idx = 0; - - RuntimeProfile* _parent_profile; // profile from result sink, not owned - // total time cost on append batch operation - RuntimeProfile::Counter* _append_row_batch_timer = nullptr; - // tuple convert timer, child timer of _append_row_batch_timer - RuntimeProfile::Counter* _convert_tuple_timer = nullptr; - // file write timer, child timer of _append_row_batch_timer - RuntimeProfile::Counter* _file_write_timer = nullptr; - // time of closing the file writer - RuntimeProfile::Counter* _writer_close_timer = nullptr; - // number of written rows - RuntimeProfile::Counter* _written_rows_counter = nullptr; - // bytes of written data - RuntimeProfile::Counter* _written_data_bytes = nullptr; - - // _sinker and _output_batch are not owned by FileResultWriter - BufferControlBlock* _sinker = nullptr; - RowBatch* _output_batch = nullptr; - // set to true if the final statistic result is sent - bool _is_result_sent = false; - bool _header_sent = false; -}; - -} // namespace doris diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp index 32430698e4..5f3b2c47ee 100644 --- a/be/src/runtime/plan_fragment_executor.cpp +++ b/be/src/runtime/plan_fragment_executor.cpp @@ -25,10 +25,8 @@ #include #include "exec/data_sink.h" -#include "exec/exchange_node.h" #include "exec/exec_node.h" #include "exec/scan_node.h" -#include "runtime/data_stream_mgr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker.h" @@ -151,7 +149,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, if (_runtime_state->enable_vectorized_exec()) { static_cast(exch_node)->set_num_senders(num_senders); } else { - static_cast(exch_node)->set_num_senders(num_senders); + return Status::NotSupported("Non-vectorized engine is not supported since Doris 1.3+."); } } @@ -639,12 +637,8 @@ void PlanFragmentExecutor::cancel(const PPlanFragmentCancelReason& reason, const // must close stream_mgr to avoid dead lock in Exchange Node auto env = _runtime_state->exec_env(); auto id = _runtime_state->fragment_instance_id(); - if (_runtime_state->enable_vectorized_exec()) { - env->vstream_mgr()->cancel(id); - } else { - env->stream_mgr()->cancel(id); - env->result_mgr()->cancel(id); - } + DCHECK(_runtime_state->enable_vectorized_exec()); + env->vstream_mgr()->cancel(id); // Cancel the result queue manager used by spark doris connector _exec_env->result_queue_mgr()->update_queue_status(id, Status::Aborted(msg)); } diff --git a/be/src/runtime/result_file_sink.cpp b/be/src/runtime/result_file_sink.cpp deleted file mode 100644 index 91143980cd..0000000000 --- a/be/src/runtime/result_file_sink.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/result_file_sink.h" - -#include "common/config.h" -#include "exprs/expr.h" -#include "runtime/buffer_control_block.h" -#include "runtime/exec_env.h" -#include "runtime/file_result_writer.h" -#include "runtime/result_buffer_mgr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/uid_util.h" - -namespace doris { - -ResultFileSink::ResultFileSink(const RowDescriptor& row_desc, - const std::vector& t_output_expr, const TResultFileSink& sink) - : DataStreamSender(nullptr, 0, row_desc), _t_output_expr(t_output_expr) { - CHECK(sink.__isset.file_options); - _file_opts.reset(new ResultFileOptions(sink.file_options)); - CHECK(sink.__isset.storage_backend_type); - _storage_type = sink.storage_backend_type; - _is_top_sink = true; - - _name = "ResultFileSink"; - //for impl csv_with_name and csv_with_names_and_types - _header_type = sink.header_type; - _header = sink.header; -} - -ResultFileSink::ResultFileSink(const RowDescriptor& row_desc, - const std::vector& t_output_expr, const TResultFileSink& sink, - const std::vector& destinations, - ObjectPool* pool, int sender_id, DescriptorTbl& descs) - : DataStreamSender(pool, sender_id, row_desc), - _t_output_expr(t_output_expr), - _output_row_descriptor(descs.get_tuple_descriptor(sink.output_tuple_id), false) { - CHECK(sink.__isset.file_options); - _file_opts.reset(new ResultFileOptions(sink.file_options)); - CHECK(sink.__isset.storage_backend_type); - _storage_type = sink.storage_backend_type; - _is_top_sink = false; - DCHECK_EQ(destinations.size(), 1); - _channel_shared_ptrs.emplace_back(new Channel( - this, _output_row_descriptor, destinations[0].brpc_server, - destinations[0].fragment_instance_id, sink.dest_node_id, _buf_size, true, true)); - _channels.push_back(_channel_shared_ptrs.back().get()); - - _name = "ResultFileSink"; - //for impl csv_with_name and csv_with_names_and_types - _header_type = sink.header_type; - _header = sink.header; -} - -ResultFileSink::~ResultFileSink() { - if (_output_batch != nullptr) { - delete _output_batch; - } -} - -Status ResultFileSink::init(const TDataSink& tsink) { - return Status::OK(); -} - -Status ResultFileSink::prepare_exprs(RuntimeState* state) { - // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(Expr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_expr_ctxs)); - // Prepare the exprs to run. - RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc)); - return Status::OK(); -} - -Status ResultFileSink::prepare(RuntimeState* state) { - RETURN_IF_ERROR(DataSink::prepare(state)); - std::stringstream title; - title << "DataBufferSender (dst_fragment_instance_id=" - << print_id(state->fragment_instance_id()) << ")"; - // create profile - _profile = state->obj_pool()->add(new RuntimeProfile(title.str())); - // prepare output_expr - RETURN_IF_ERROR(prepare_exprs(state)); - - CHECK(_file_opts.get() != nullptr); - if (_is_top_sink) { - // create sender - RETURN_IF_ERROR(state->exec_env()->result_mgr()->create_sender( - state->fragment_instance_id(), _buf_size, &_sender, - _state->enable_pipeline_exec())); - // create writer - _writer.reset(new (std::nothrow) FileResultWriter( - _file_opts.get(), _storage_type, state->fragment_instance_id(), _output_expr_ctxs, - _profile, _sender.get(), nullptr, state->return_object_data_as_binary())); - } else { - // init channel - _profile = _pool->add(new RuntimeProfile(title.str())); - _state = state; - _serialize_batch_timer = ADD_TIMER(profile(), "SerializeBatchTime"); - _bytes_sent_counter = ADD_COUNTER(profile(), "BytesSent", TUnit::BYTES); - _local_bytes_send_counter = ADD_COUNTER(profile(), "LocalBytesSent", TUnit::BYTES); - _uncompressed_bytes_counter = - ADD_COUNTER(profile(), "UncompressedRowBatchSize", TUnit::BYTES); - // create writer - _output_batch = new RowBatch(_output_row_descriptor, 1024); - _writer.reset(new (std::nothrow) FileResultWriter( - _file_opts.get(), _storage_type, state->fragment_instance_id(), _output_expr_ctxs, - _profile, nullptr, _output_batch, state->return_object_data_as_binary())); - } - _writer->set_header_info(_header_type, _header); - RETURN_IF_ERROR(_writer->init(state)); - for (int i = 0; i < _channels.size(); ++i) { - RETURN_IF_ERROR(_channels[i]->init(state)); - } - return Status::OK(); -} - -Status ResultFileSink::open(RuntimeState* state) { - return Expr::open(_output_expr_ctxs, state); -} - -Status ResultFileSink::send(RuntimeState* state, RowBatch* batch) { - RETURN_IF_ERROR(_writer->append_row_batch(batch)); - return Status::OK(); -} - -Status ResultFileSink::close(RuntimeState* state, Status exec_status) { - if (_closed) { - return Status::OK(); - } - - Status final_status = exec_status; - // close the writer - if (_writer) { - Status st = _writer->close(); - if (!st.ok() && exec_status.ok()) { - // close file writer failed, should return this error to client - final_status = st; - } - } - if (_is_top_sink) { - // close sender, this is normal path end - if (_sender) { - _sender->update_num_written_rows(_writer == nullptr ? 0 : _writer->get_written_rows()); - _sender->close(final_status); - } - state->exec_env()->result_mgr()->cancel_at_time( - time(nullptr) + config::result_buffer_cancelled_interval_time, - state->fragment_instance_id()); - } else { - if (final_status.ok()) { - RETURN_IF_ERROR(serialize_batch(_output_batch, _cur_pb_batch, _channels.size())); - for (auto channel : _channels) { - RETURN_IF_ERROR(channel->send_batch(_cur_pb_batch)); - } - } - Status final_st = Status::OK(); - for (int i = 0; i < _channels.size(); ++i) { - Status st = _channels[i]->close(state); - if (!st.ok() && final_st.ok()) { - final_st = st; - } - } - // wait all channels to finish - for (int i = 0; i < _channels.size(); ++i) { - Status st = _channels[i]->close_wait(state); - if (!st.ok() && final_st.ok()) { - final_st = st; - } - } - // release row batch - _output_batch->reset(); - } - - Expr::close(_output_expr_ctxs, state); - - _closed = true; - return Status::OK(); -} - -void ResultFileSink::set_query_statistics(std::shared_ptr statistics) { - if (_is_top_sink) { - _sender->set_query_statistics(statistics); - } else { - _query_statistics = statistics; - } -} - -} // namespace doris diff --git a/be/src/runtime/result_file_sink.h b/be/src/runtime/result_file_sink.h deleted file mode 100644 index 042e80d867..0000000000 --- a/be/src/runtime/result_file_sink.h +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "common/status.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/data_stream_sender.h" -#include "runtime/descriptors.h" - -namespace doris { - -class RowBatch; -class ObjectPool; -class RuntimeState; -class RuntimeProfile; -class BufferControlBlock; -class ExprContext; -class ResultWriter; -struct ResultFileOptions; - -class ResultFileSink : public DataStreamSender { -public: - ResultFileSink(const RowDescriptor& row_desc, const std::vector& select_exprs, - const TResultFileSink& sink); - ResultFileSink(const RowDescriptor& row_desc, const std::vector& select_exprs, - const TResultFileSink& sink, - const std::vector& destinations, ObjectPool* pool, - int sender_id, DescriptorTbl& descs); - virtual ~ResultFileSink(); - virtual Status init(const TDataSink& thrift_sink) override; - virtual Status prepare(RuntimeState* state) override; - virtual Status open(RuntimeState* state) override; - // send data in 'batch' to this backend stream mgr - // Blocks until all rows in batch are placed in the buffer - virtual Status send(RuntimeState* state, RowBatch* batch) override; - // Flush all buffered data and close all existing channels to destination - // hosts. Further send() calls are illegal after calling close(). - virtual Status close(RuntimeState* state, Status exec_status) override; - virtual RuntimeProfile* profile() override { return _profile; } - - void set_query_statistics(std::shared_ptr statistics) override; - -private: - Status prepare_exprs(RuntimeState* state); - // set file options when sink type is FILE - std::unique_ptr _file_opts; - TStorageBackendType::type _storage_type; - - // Owned by the RuntimeState. - const std::vector& _t_output_expr; - std::vector _output_expr_ctxs; - RowDescriptor _output_row_descriptor; - - std::shared_ptr _sender; - std::shared_ptr _writer; - RowBatch* _output_batch = nullptr; - int _buf_size = 1024; // Allocated from _pool - bool _is_top_sink = true; - std::string _header; - std::string _header_type; -}; - -} // namespace doris diff --git a/be/src/runtime/result_sink.cpp b/be/src/runtime/result_sink.cpp deleted file mode 100644 index 625e495ffa..0000000000 --- a/be/src/runtime/result_sink.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/result_sink.h" - -#include "common/config.h" -#include "exprs/expr.h" -#include "runtime/buffer_control_block.h" -#include "runtime/exec_env.h" -#include "runtime/file_result_writer.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/mysql_result_writer.h" -#include "runtime/result_buffer_mgr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/thread_context.h" -#include "util/uid_util.h" -#include "vec/exprs/vexpr.h" - -namespace doris { - -ResultSink::ResultSink(const RowDescriptor& row_desc, const std::vector& t_output_expr, - const TResultSink& sink, int buffer_size) - : _row_desc(row_desc), _t_output_expr(t_output_expr), _buf_size(buffer_size) { - if (!sink.__isset.type || sink.type == TResultSinkType::MYSQL_PROTOCAL) { - _sink_type = TResultSinkType::MYSQL_PROTOCAL; - } else { - _sink_type = sink.type; - } - - _name = "ResultSink"; -} - -ResultSink::~ResultSink() {} - -Status ResultSink::prepare_exprs(RuntimeState* state) { - // From the thrift expressions create the real exprs. - RETURN_IF_ERROR(Expr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_expr_ctxs)); - // Prepare the exprs to run. - RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc)); - return Status::OK(); -} - -Status ResultSink::prepare(RuntimeState* state) { - RETURN_IF_ERROR(DataSink::prepare(state)); - std::stringstream title; - title << "DataBufferSender (dst_fragment_instance_id=" - << print_id(state->fragment_instance_id()) << ")"; - // create profile - _profile = state->obj_pool()->add(new RuntimeProfile(title.str())); - // prepare output_expr - RETURN_IF_ERROR(prepare_exprs(state)); - - // create sender - RETURN_IF_ERROR(state->exec_env()->result_mgr()->create_sender( - state->fragment_instance_id(), _buf_size, &_sender, state->enable_pipeline_exec())); - - // create writer based on sink type - switch (_sink_type) { - case TResultSinkType::MYSQL_PROTOCAL: - _writer.reset(new (std::nothrow) MysqlResultWriter( - _sender.get(), _output_expr_ctxs, _profile, state->return_object_data_as_binary())); - break; - default: - return Status::InternalError("Unknown result sink type"); - } - - RETURN_IF_ERROR(_writer->init(state)); - return Status::OK(); -} - -Status ResultSink::open(RuntimeState* state) { - return Expr::open(_output_expr_ctxs, state); -} - -Status ResultSink::send(RuntimeState* state, RowBatch* batch) { - // The memory consumption in the process of sending the results is not check query memory limit. - // Avoid the query being cancelled when the memory limit is reached after the query result comes out. - STOP_CHECK_THREAD_MEM_TRACKER_LIMIT(); - return _writer->append_row_batch(batch); -} - -Status ResultSink::close(RuntimeState* state, Status exec_status) { - if (_closed) { - return Status::OK(); - } - - Status final_status = exec_status; - // close the writer - if (_writer) { - Status st = _writer->close(); - if (!st.ok() && exec_status.ok()) { - // close file writer failed, should return this error to client - final_status = st; - } - } - - // close sender, this is normal path end - if (_sender) { - _sender->update_num_written_rows(_writer == nullptr ? 0 : _writer->get_written_rows()); - _sender->update_max_peak_memory_bytes(); - _sender->close(final_status); - } - state->exec_env()->result_mgr()->cancel_at_time( - time(nullptr) + config::result_buffer_cancelled_interval_time, - state->fragment_instance_id()); - - Expr::close(_output_expr_ctxs, state); - - return DataSink::close(state, exec_status); -} - -void ResultSink::set_query_statistics(std::shared_ptr statistics) { - _sender->set_query_statistics(statistics); -} - -} // namespace doris -/* vim: set ts=4 sw=4 sts=4 tw=100 : */ diff --git a/be/src/runtime/result_sink.h b/be/src/runtime/result_sink.h deleted file mode 100644 index 1846deff11..0000000000 --- a/be/src/runtime/result_sink.h +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "common/status.h" -#include "exec/data_sink.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/PlanNodes_types.h" - -namespace doris { - -class ObjectPool; -class RowBatch; -class ObjectPool; -class RuntimeState; -class RuntimeProfile; -class BufferControlBlock; -class ExprContext; -class ResultWriter; -struct ResultFileOptions; - -namespace vectorized { -class VExprContext; -} - -class ResultSink : public DataSink { -public: - ResultSink(const RowDescriptor& row_desc, const std::vector& select_exprs, - const TResultSink& sink, int buffer_size); - virtual ~ResultSink(); - virtual Status prepare(RuntimeState* state) override; - virtual Status open(RuntimeState* state) override; - // send data in 'batch' to this backend stream mgr - // Blocks until all rows in batch are placed in the buffer - virtual Status send(RuntimeState* state, RowBatch* batch) override; - // Flush all buffered data and close all existing channels to destination - // hosts. Further send() calls are illegal after calling close(). - virtual Status close(RuntimeState* state, Status exec_status) override; - virtual RuntimeProfile* profile() override { return _profile; } - - void set_query_statistics(std::shared_ptr statistics) override; - -private: - Status prepare_exprs(RuntimeState* state); - TResultSinkType::type _sink_type; - - // Owned by the RuntimeState. - const RowDescriptor& _row_desc; - - // Owned by the RuntimeState. - const std::vector& _t_output_expr; - std::vector _output_expr_ctxs; - - std::shared_ptr _sender; - std::shared_ptr _writer; - RuntimeProfile* _profile; // Allocated from _pool - int _buf_size; // Allocated from _pool -}; - -} // namespace doris diff --git a/be/src/runtime/sorted_run_merger.cc b/be/src/runtime/sorted_run_merger.cc deleted file mode 100644 index 31ae06f47a..0000000000 --- a/be/src/runtime/sorted_run_merger.cc +++ /dev/null @@ -1,341 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/sorted-run-merger.cc -// and modified by Doris - -#include "runtime/sorted_run_merger.h" - -#include -#include - -#include "exprs/expr.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/sorter.h" -#include "runtime/thread_context.h" -#include "runtime/tuple_row.h" -#include "util/debug_util.h" -#include "util/defer_op.h" -#include "util/runtime_profile.h" - -using std::vector; - -namespace doris { - -// BatchedRowSupplier returns individual rows in a batch obtained from a sorted input -// run (a RunBatchSupplier). Used as the heap element in the min heap maintained by the -// merger. -// next() advances the row supplier to the next row in the input batch and retrieves -// the next batch from the input if the current input batch is exhausted. Transfers -// ownership from the current input batch to an output batch if requested. -class SortedRunMerger::BatchedRowSupplier { -public: - // Construct an instance from a sorted input run. - BatchedRowSupplier(SortedRunMerger* parent, const RunBatchSupplier& sorted_run) - : _sorted_run(sorted_run), - _input_row_batch(nullptr), - _input_row_batch_index(-1), - _parent(parent) {} - - virtual ~BatchedRowSupplier() = default; - - // Retrieves the first batch of sorted rows from the run. - virtual Status init(bool* done) { - *done = false; - RETURN_IF_ERROR(_sorted_run(&_input_row_batch)); - if (_input_row_batch == nullptr) { - *done = true; - return Status::OK(); - } - RETURN_IF_ERROR(next(nullptr, done)); - return Status::OK(); - } - - // Increment the current row index. If the current input batch is exhausted fetch the - // next one from the sorted run. Transfer ownership to transfer_batch if not nullptr. - virtual Status next(RowBatch* transfer_batch, bool* done) { - DCHECK(_input_row_batch != nullptr); - ++_input_row_batch_index; - if (_input_row_batch_index < _input_row_batch->num_rows()) { - *done = false; - } else { - ScopedTimer timer(_parent->_get_next_batch_timer); - if (transfer_batch != nullptr) { - _input_row_batch->transfer_resource_ownership(transfer_batch); - } - - RETURN_IF_ERROR(_sorted_run(&_input_row_batch)); - DCHECK(_input_row_batch == nullptr || _input_row_batch->num_rows() > 0); - *done = _input_row_batch == nullptr; - _input_row_batch_index = 0; - } - return Status::OK(); - } - - TupleRow* current_row() const { return _input_row_batch->get_row(_input_row_batch_index); } - - RowBatch* get_row_batch() const { return _input_row_batch; } - -protected: - friend class SortedRunMerger; - - // The run from which this object supplies rows. - RunBatchSupplier _sorted_run; - - // The current input batch being processed. - RowBatch* _input_row_batch; - - // Index into _input_row_batch of the current row being processed. - int _input_row_batch_index; - - // The parent merger instance. - SortedRunMerger* _parent; -}; - -class SortedRunMerger::ParallelBatchedRowSupplier : public SortedRunMerger::BatchedRowSupplier { -public: - // Construct an instance from a sorted input run. - ParallelBatchedRowSupplier(SortedRunMerger* parent, const RunBatchSupplier& sorted_run) - : BatchedRowSupplier(parent, sorted_run), _input_row_batch_backup(nullptr) {} - - ~ParallelBatchedRowSupplier() { - // when have the limit clause need to wait the _pull_task_thread join terminate - _cancel = true; - _backup_ready = false; - _batch_prepared_cv.notify_one(); - _pull_task_thread.join(); - - delete _input_row_batch; - delete _input_row_batch_backup; - } - - // Retrieves the first batch of sorted rows from the run. - Status init(bool* done) override { - *done = false; - _pull_task_thread = - std::thread(&SortedRunMerger::ParallelBatchedRowSupplier::process_sorted_run_task, - this, thread_context()->thread_mem_tracker_mgr->limiter_mem_tracker()); - - RETURN_IF_ERROR(next(nullptr, done)); - return Status::OK(); - } - - // Increment the current row index. If the current input batch is exhausted fetch the - // next one from the sorted run. Transfer ownership to transfer_batch if not nullptr. - Status next(RowBatch* transfer_batch, bool* done) override { - ++_input_row_batch_index; - if (_input_row_batch && _input_row_batch_index < _input_row_batch->num_rows()) { - *done = false; - } else { - ScopedTimer timer(_parent->_get_next_batch_timer); - if (_input_row_batch && transfer_batch != nullptr) { - _input_row_batch->transfer_resource_ownership(transfer_batch); - } - // release the mem of child merge - delete _input_row_batch; - - std::unique_lock lock(_mutex); - _batch_prepared_cv.wait(lock, [this]() { return _backup_ready.load(); }); - - // switch input_row_batch_backup to _input_row_batch - _input_row_batch = _input_row_batch_backup; - _input_row_batch_index = 0; - _input_row_batch_backup = nullptr; - _backup_ready = false; - DCHECK(_input_row_batch == nullptr || _input_row_batch->num_rows() > 0); - - *done = _input_row_batch == nullptr; - _batch_prepared_cv.notify_one(); - } - return Status::OK(); - } - -private: - // The backup row batch input be backup batch from _sort_run. - RowBatch* _input_row_batch_backup; - - std::atomic_bool _backup_ready {false}; - - std::atomic_bool _cancel {false}; - - std::thread _pull_task_thread; - - Status _status_backup; - - std::mutex _mutex; - - // signal of new batch or the eos/cancelled condition - std::condition_variable _batch_prepared_cv; - - void process_sorted_run_task(const std::shared_ptr& mem_tracker) { - SCOPED_ATTACH_TASK(mem_tracker); - std::unique_lock lock(_mutex); - while (true) { - _batch_prepared_cv.wait(lock, [this]() { return !_backup_ready.load(); }); - if (_cancel) { - break; - } - - // do merge from sender queue data - _status_backup = _sorted_run(&_input_row_batch_backup); - _backup_ready = true; - Defer defer_op {[this]() { _batch_prepared_cv.notify_one(); }}; - - if (!_status_backup.ok() || _input_row_batch_backup == nullptr || _cancel) { - if (!_status_backup.ok()) _input_row_batch_backup = nullptr; - break; - } - } - } -}; - -void SortedRunMerger::heapify(int parent_index) { - int left_index = 2 * parent_index + 1; - int right_index = left_index + 1; - if (left_index >= _min_heap.size()) { - return; - } - int least_child = 0; - // Find the least child of parent. - if (right_index >= _min_heap.size() || - _compare_less_than(_min_heap[left_index]->current_row(), - _min_heap[right_index]->current_row())) { - least_child = left_index; - } else { - least_child = right_index; - } - - // If the parent is out of place, swap it with the least child and invoke - // heapify recursively. - if (_compare_less_than(_min_heap[least_child]->current_row(), - _min_heap[parent_index]->current_row())) { - iter_swap(_min_heap.begin() + least_child, _min_heap.begin() + parent_index); - heapify(least_child); - } -} - -SortedRunMerger::SortedRunMerger(const TupleRowComparator& compare_less_than, - RowDescriptor* row_desc, RuntimeProfile* profile, - bool deep_copy_input) - : _compare_less_than(compare_less_than), - _input_row_desc(row_desc), - _deep_copy_input(deep_copy_input) { - _get_next_timer = ADD_TIMER(profile, "MergeGetNext"); - _get_next_batch_timer = ADD_TIMER(profile, "MergeGetNextBatch"); -} - -Status SortedRunMerger::prepare(const vector& input_runs, bool parallel) { - DCHECK_EQ(_min_heap.size(), 0); - _min_heap.reserve(input_runs.size()); - for (const RunBatchSupplier& input_run : input_runs) { - BatchedRowSupplier* new_elem = - _pool.add(parallel ? new ParallelBatchedRowSupplier(this, input_run) - : new BatchedRowSupplier(this, input_run)); - DCHECK(new_elem != nullptr); - bool empty = false; - RETURN_IF_ERROR(new_elem->init(&empty)); - if (!empty) { - _min_heap.push_back(new_elem); - } - } - - // Construct the min heap from the sorted runs. - const int last_parent = (_min_heap.size() / 2) - 1; - for (int i = last_parent; i >= 0; --i) { - heapify(i); - } - return Status::OK(); -} - -void SortedRunMerger::transfer_all_resources(class doris::RowBatch* transfer_resource_batch) { - for (BatchedRowSupplier* batched_row_supplier : _min_heap) { - auto row_batch = batched_row_supplier->get_row_batch(); - if (row_batch != nullptr) { - row_batch->transfer_resource_ownership(transfer_resource_batch); - } - } -} - -Status SortedRunMerger::get_next(RowBatch* output_batch, bool* eos) { - ScopedTimer timer(_get_next_timer); - if (_min_heap.empty()) { - *eos = true; - return Status::OK(); - } - - while (!output_batch->at_capacity()) { - BatchedRowSupplier* min = _min_heap[0]; - int output_row_index = output_batch->add_row(); - TupleRow* output_row = output_batch->get_row(output_row_index); - if (_deep_copy_input) { - min->current_row()->deep_copy(output_row, _input_row_desc->tuple_descriptors(), - output_batch->tuple_data_pool(), false); - } else { - // Simply copy tuple pointers if deep_copy is false. - memcpy(output_row, min->current_row(), - _input_row_desc->tuple_descriptors().size() * sizeof(Tuple*)); - } - - output_batch->commit_last_row(); - - bool min_run_complete = false; - // Advance to the next element in min. output_batch is supplied to transfer - // resource ownership if the input batch in min is exhausted. - RETURN_IF_ERROR(min->next(_deep_copy_input ? nullptr : output_batch, &min_run_complete)); - if (min_run_complete) { - // Remove the element from the heap. - iter_swap(_min_heap.begin(), _min_heap.end() - 1); - _min_heap.pop_back(); - if (_min_heap.empty()) break; - } - - heapify(0); - } - - *eos = _min_heap.empty(); - return Status::OK(); -} - -ChildSortedRunMerger::ChildSortedRunMerger(const TupleRowComparator& compare_less_than, - RowDescriptor* row_desc, RuntimeProfile* profile, - uint32_t row_batch_size, bool deep_copy_input) - : SortedRunMerger(compare_less_than, row_desc, profile, deep_copy_input), - _eos(false), - _row_batch_size(row_batch_size) { - _get_next_timer = ADD_TIMER(profile, "ChildMergeGetNext"); - _get_next_batch_timer = ADD_TIMER(profile, "ChildMergeGetNextBatch"); -} - -Status ChildSortedRunMerger::get_batch(RowBatch** output_batch) { - *output_batch = nullptr; - if (_eos) { - return Status::OK(); - } - - _current_row_batch.reset(new RowBatch(*_input_row_desc, _row_batch_size)); - - bool eos = false; - RETURN_IF_ERROR(get_next(_current_row_batch.get(), &eos)); - *output_batch = - UNLIKELY(_current_row_batch->num_rows() == 0) ? nullptr : _current_row_batch.release(); - _eos = eos; - - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/runtime/sorted_run_merger.h b/be/src/runtime/sorted_run_merger.h deleted file mode 100644 index 4326692522..0000000000 --- a/be/src/runtime/sorted_run_merger.h +++ /dev/null @@ -1,128 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/sorted-run-merger.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "util/tuple_row_compare.h" - -namespace doris { - -class RowBatch; -class RowDescriptor; -class RuntimeProfile; - -// SortedRunMerger is used to merge multiple sorted runs of tuples. A run is a sorted -// sequence of row batches, which are fetched from a RunBatchSupplier function object. -// Merging is implemented using a binary min-heap that maintains the run with the next -// tuple in sorted order at the top of the heap. -// -// Merged batches of rows are retrieved from SortedRunMerger via calls to get_next(). -// The merger is constructed with a boolean flag deep_copy_input. -// If true, sorted output rows are deep copied into the data pool of the output batch. -// If false, get_next() only copies tuple pointers (TupleRows) into the output batch, -// and transfers resource ownership from the input batches to the output batch when -// an input batch is processed. -class SortedRunMerger { -public: - // Function that returns the next batch of rows from an input sorted run. The batch - // is owned by the supplier (i.e. not SortedRunMerger). eos is indicated by an nullptr - // batch being returned. - typedef std::function RunBatchSupplier; - - SortedRunMerger(const TupleRowComparator& compare_less_than, RowDescriptor* row_desc, - RuntimeProfile* profile, bool deep_copy_input); - - virtual ~SortedRunMerger() = default; - - // Prepare this merger to merge and return rows from the sorted runs in 'input_runs'. - // Retrieves the first batch from each run and sets up the binary heap implementing - // the priority queue. - Status prepare(const std::vector& input_runs, bool parallel = false); - - // Return the next batch of sorted rows from this merger. - Status get_next(RowBatch* output_batch, bool* eos); - - // Only Child class implement this Method, Return the next batch of sorted rows from this merger. - virtual Status get_batch(RowBatch** output_batch) { - return Status::InternalError("no support method get_batch(RowBatch** output_batch)"); - } - - // Called to finalize a merge when deep_copy is false. Transfers resources from - // all input batches to the specified output batch. - void transfer_all_resources(RowBatch* transfer_resource_batch); - -protected: - class BatchedRowSupplier; - class ParallelBatchedRowSupplier; - - // Assuming the element at parent_index is the only out of place element in the heap, - // restore the heap property (i.e. swap elements so parent <= children). - void heapify(int parent_index); - - // The binary min-heap used to merge rows from the sorted input runs. Since the heap is - // stored in a 0-indexed array, the 0-th element is the minimum element in the heap, - // and the children of the element at index i are 2*i+1 and 2*i+2. The heap property is - // that row of the parent element is <= the rows of the child elements according to the - // comparator _compare_less_than. - // The BatchedRowSupplier objects used in the _min_heap are owned by this - // SortedRunMerger instance. - std::vector _min_heap; - - // Row comparator. Returns true if lhs < rhs. - TupleRowComparator _compare_less_than; - - // Descriptor for the rows provided by the input runs. Owned by the exec-node through - // which this merger was created. - RowDescriptor* _input_row_desc; - - // True if rows must be deep copied into the output batch. - bool _deep_copy_input; - - // Pool of BatchedRowSupplier instances. - ObjectPool _pool; - - // Times calls to get_next(). - RuntimeProfile::Counter* _get_next_timer; - - // Times calls to get the next batch of rows from the input run. - RuntimeProfile::Counter* _get_next_batch_timer; -}; - -class ChildSortedRunMerger : public SortedRunMerger { -public: - ChildSortedRunMerger(const TupleRowComparator& compare_less_than, RowDescriptor* row_desc, - RuntimeProfile* profile, uint32_t row_batch_size, bool deep_copy_input); - - Status get_batch(RowBatch** output_batch) override; - -private: - // Ptr to prevent mem leak for api get_batch(Rowbatch**) - std::unique_ptr _current_row_batch; - - // The data in merger is exhaust - bool _eos = false; - - uint32_t _row_batch_size; -}; - -} // namespace doris diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc deleted file mode 100644 index 97ec436956..0000000000 --- a/be/src/runtime/spill_sorter.cc +++ /dev/null @@ -1,1323 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/spill_sorter.h" - -#include -#include - -#include "runtime/buffered_block_mgr2.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/sorted_run_merger.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -using std::deque; -using std::string; -using std::vector; - -using std::bind; -using std::function; -using std::mem_fn; -using std::unique_ptr; - -namespace doris { - -// Number of pinned blocks required for a merge. -const int BLOCKS_REQUIRED_FOR_MERGE = 3; - -// Error message when pinning fixed or variable length blocks failed. -// TODO: Add the node id that initiated the sort -const string PIN_FAILED_ERROR_MSG_1 = "Failed to pin block for "; -const string PIN_FAILED_ERROR_MSG_2 = - "-length data needed " - "for sorting. Reducing query concurrency or increasing the memory limit may help " - "this query to complete successfully."; - -const string MEM_ALLOC_FAILED_ERROR_MSG_1 = "Failed to allocate block for $0-length "; -const string MEM_ALLOC_FAILED_ERROR_MSG_2 = - "-length " - "data needed for sorting. Reducing query concurrency or increasing the " - "memory limit may help this query to complete successfully."; - -static std::string get_pin_failed_error_msg(const std::string& block_type) { - std::stringstream error_msg; - error_msg << PIN_FAILED_ERROR_MSG_1 << block_type << PIN_FAILED_ERROR_MSG_2; - return error_msg.str(); -} - -static std::string get_mem_alloc_failed_error_msg(const std::string& block_type) { - std::stringstream error_msg; - error_msg << MEM_ALLOC_FAILED_ERROR_MSG_1 << block_type << MEM_ALLOC_FAILED_ERROR_MSG_2; - return error_msg.str(); -} - -// A run is a sequence of blocks containing tuples that are or will eventually be in -// sorted order. -// A run may maintain two sequences of blocks - one containing the tuples themselves, -// (i.e. fixed-len slots and ptrs to var-len data), and the other for the var-length -// column data pointed to by those tuples. -// Tuples in a run may be sorted in place (in-memory) and merged using a merger. -class SpillSorter::Run { -public: - // materialize_slots is true for runs constructed from input rows. The input rows are - // materialized into single sort tuples using the expressions in - // _sort_tuple_slot_expr_ctxs. For intermediate merges, the tuples are already - // materialized so materialize_slots is false. - Run(SpillSorter* parent, TupleDescriptor* sort_tuple_desc, bool materialize_slots); - - ~Run() { delete_all_blocks(); } - - // Initialize the run for input rows by allocating the minimum number of required - // blocks - one block for fixed-len data added to _fixed_len_blocks, one for the - // initially unsorted var-len data added to _var_len_blocks, and one to copy sorted - // var-len data into (_var_len_copy_block). - Status init(); - - // Add a batch of input rows to the current run. Returns the number - // of rows actually added in num_processed. If the run is full (no more blocks can - // be allocated), num_processed may be less than the number of rows in the batch. - // If _materialize_slots is true, materializes the input rows using the expressions - // in _sorter->_sort_tuple_slot_expr_ctxs, else just copies the input rows. - template - Status add_batch(RowBatch* batch, int start_index, int* num_processed); - - // Attaches all fixed-len and var-len blocks to the given row batch. - void transfer_resources(RowBatch* row_batch); - - // Unpins all the blocks in a sorted run. Var-length column data is copied into new - // blocks in sorted order. Pointers in the original tuples are converted to offsets - // from the beginning of the sequence of var-len data blocks. - Status unpin_all_blocks(); - - // Deletes all blocks. - void delete_all_blocks(); - - // Interface for merger - get the next batch of rows from this run. The callee (Run) - // still owns the returned batch. Calls get_next(RowBatch*, bool*). - Status get_next_batch(RowBatch** sorted_batch); - -private: - friend class SpillSorter; - friend class TupleSorter; - - // Fill output_batch with rows from this run. If convert_offset_to_ptr is true, offsets - // in var-length slots are converted back to pointers. Only row pointers are copied - // into output_batch. - // If this run was unpinned, one block (2 if there are var-len slots) is pinned while - // rows are filled into output_batch. The block is unpinned before the next block is - // pinned. At most 1 (2) block(s) will be pinned at any time. - // If the run was pinned, the blocks are not unpinned (SpillSorter holds on to the memory). - // In either case, all rows in output_batch will have their fixed and var-len data from - // the same block. - // TODO: If we leave the last run to be merged in memory, the fixed-len blocks can be - // unpinned as they are consumed. - template - Status get_next(RowBatch* output_batch, bool* eos); - - // Check if a run can be extended by allocating additional blocks from the block - // manager. Always true when building a sorted run in an intermediate merge, because - // the current block(s) can be unpinned before getting the next free block (so a block - // is always available) - bool can_extend_run() const; - - // Collect the non-null var-len (e.g. STRING) slots from 'src' in var_slots and return - // the total length of all var_len slots in total_var_len. - void collect_non_null_varslots(Tuple* src, vector* var_len_values, - int* total_var_len); - - // Check if the current run can be extended by a block. Add the newly allocated block - // to block_sequence, or set added to false if the run could not be extended. - // If the run is sorted (produced by an intermediate merge), unpin the last block in - // block_sequence before allocating and adding a new block - the run can always be - // extended in this case. If the run is unsorted, check _max_blocks_in_unsorted_run - // to see if a block can be added to the run. Also updates the sort bytes counter. - Status try_add_block(vector* block_sequence, bool* added); - - // Prepare to read a sorted run. Pins the first block(s) in the run if the run was - // previously unpinned. - Status prepare_read(); - - // Copy the StringValue data in var_values to dest in order and update the StringValue - // ptrs to point to the copied data. - void copy_var_len_data(char* dest, const vector& var_values); - - // Copy the StringValue in var_values to dest in order. Update the StringValue ptrs to - // contain an offset to the copied data. Parameter 'offset' is the offset for the first - // StringValue. - void copy_var_len_data_convert_offset(char* dest, int64_t offset, - const vector& var_values); - - // Returns true if we have var-len slots and there are var-len blocks. - bool has_var_len_blocks() const { return _has_var_len_slots && !_var_len_blocks.empty(); } - - // Parent sorter object. - const SpillSorter* _sorter; - - // Materialized sort tuple. Input rows are materialized into 1 tuple (with descriptor - // _sort_tuple_desc) before sorting. - const TupleDescriptor* _sort_tuple_desc; - - // Sizes of sort tuple and block. - const int _sort_tuple_size; - const int _block_size; - - const bool _has_var_len_slots; - - // True if the sort tuple must be materialized from the input batch in add_batch(). - // _materialize_slots is true for runs being constructed from input batches, and - // is false for runs being constructed from intermediate merges. - const bool _materialize_slots; - - // True if the run is sorted. Set to true after an in-memory sort, and initialized to - // true for runs resulting from merges. - bool _is_sorted; - - // True if all blocks in the run are pinned. - bool _is_pinned; - - // Sequence of blocks in this run containing the fixed-length portion of the sort - // tuples comprising this run. The data pointed to by the var-len slots are in - // _var_len_blocks. - // If _is_sorted is true, the tuples in _fixed_len_blocks will be in sorted order. - // _fixed_len_blocks[i] is nullptr iff it has been deleted. - vector _fixed_len_blocks; - - // Sequence of blocks in this run containing the var-length data corresponding to the - // var-length columns from _fixed_len_blocks. These are reconstructed to be in sorted - // order in unpin_all_blocks(). - // _var_len_blocks[i] is nullptr iff it has been deleted. - vector _var_len_blocks; - - // If there are var-len slots, an extra pinned block is used to copy out var-len data - // into a new sequence of blocks in sorted order. _var_len_copy_block stores this - // extra allocated block. - BufferedBlockMgr2::Block* _var_len_copy_block; - - // Number of tuples so far in this run. - int64_t _num_tuples; - - // Number of tuples returned via get_next(), maintained for debug purposes. - int64_t _num_tuples_returned; - - // _buffered_batch is used to return TupleRows to the merger when this run is being - // merged. _buffered_batch is returned in calls to get_next_batch(). - unique_ptr _buffered_batch; - - // Members used when a run is read in get_next(). - // The index into the _fixed_len_blocks and _var_len_blocks vectors of the current blocks being - // processed in get_next(). - int _fixed_len_blocks_index; - int _var_len_blocks_index; - - // If true, pin the next fixed and var-len blocks and delete the previous ones - // in the next call to get_next(). Set during the previous call to get_next(). - // Not used if a run is already pinned. - bool _pin_next_fixed_len_block; - bool _pin_next_var_len_block; - - // Offset into the current fixed length data block being processed. - int _fixed_len_block_offset; -}; // class SpillSorter::Run - -// Sorts a sequence of tuples from a run in place using a provided tuple comparator. -// Quick sort is used for sequences of tuples larger that 16 elements, and insertion sort -// is used for smaller sequences. The TupleSorter is initialized with a RuntimeState -// instance to check for cancellation during an in-memory sort. -class SpillSorter::TupleSorter { -public: - TupleSorter(const TupleRowComparator& less_than_comp, int64_t block_size, int tuple_size, - RuntimeState* state); - - ~TupleSorter(); - - // Performs a quicksort for tuples in 'run' followed by an insertion sort to - // finish smaller blocks. - // Returns early if _state->is_cancelled() is true. No status - // is returned - the caller must check for cancellation. - void sort(Run* run); - -private: - static const int INSERTION_THRESHOLD = 16; - - // Helper class used to iterate over tuples in a run during quick sort and insertion sort. - class TupleIterator { - public: - TupleIterator(TupleSorter* parent, int64_t index) - : _parent(parent), _index(index), _current_tuple(nullptr) { - DCHECK_GE(index, 0); - DCHECK_LE(index, _parent->_run->_num_tuples); - // If the run is empty, only _index and _current_tuple are initialized. - if (_parent->_run->_num_tuples == 0) { - return; - } - // If the iterator is initialized to past the end, set up _buffer_start and - // _block_index as if it pointing to the last tuple. Add _tuple_size bytes to - // _current_tuple, so everything is correct when prev() is invoked. - int past_end_bytes = 0; - if (UNLIKELY(index >= _parent->_run->_num_tuples)) { - past_end_bytes = parent->_tuple_size; - _index = _parent->_run->_num_tuples; - index = _index - 1; - } - _block_index = index / parent->_block_capacity; - _buffer_start = parent->_run->_fixed_len_blocks[_block_index]->buffer(); - int block_offset = (index % parent->_block_capacity) * parent->_tuple_size; - _current_tuple = _buffer_start + block_offset + past_end_bytes; - } - - ~TupleIterator() {} - - // Sets _current_tuple to point to the next tuple in the run. Increments - // block_index and resets buffer if the next tuple is in the next block. - void next() { - _current_tuple += _parent->_tuple_size; - ++_index; - if (UNLIKELY(_current_tuple > _buffer_start + _parent->_last_tuple_block_offset && - _index < _parent->_run->_num_tuples)) { - // Don't increment block index, etc. past the end. - ++_block_index; - DCHECK_LT(_block_index, _parent->_run->_fixed_len_blocks.size()); - _buffer_start = _parent->_run->_fixed_len_blocks[_block_index]->buffer(); - _current_tuple = _buffer_start; - } - } - - // Sets current_tuple to point to the previous tuple in the run. Decrements - // block_index and resets buffer if the new tuple is in the previous block. - void prev() { - _current_tuple -= _parent->_tuple_size; - --_index; - if (UNLIKELY(_current_tuple < _buffer_start && _index >= 0)) { - --_block_index; - DCHECK_GE(_block_index, 0); - _buffer_start = _parent->_run->_fixed_len_blocks[_block_index]->buffer(); - _current_tuple = _buffer_start + _parent->_last_tuple_block_offset; - } - } - - private: - friend class TupleSorter; - - // Pointer to the tuple sorter. - TupleSorter* _parent; - - // Index of the current tuple in the run. - int64_t _index; - - // Pointer to the current tuple. - uint8_t* _current_tuple; - - // Start of the buffer containing current tuple. - uint8_t* _buffer_start; - - // Index into _run._fixed_len_blocks of the block containing the current tuple. - int _block_index; - }; - - // Size of the tuples in memory. - const int _tuple_size; - - // Number of tuples per block in a run. - const int _block_capacity; - - // Offset in bytes of the last tuple in a block, calculated from block and tuple sizes. - const int _last_tuple_block_offset; - - // Tuple comparator that returns true if lhs < rhs. - const TupleRowComparator _less_than_comp; - - // Runtime state instance to check for cancellation. Not owned. - RuntimeState* const _state; - - // The run to be sorted. - Run* _run; - - // Temporarily allocated space to copy and swap tuples (Both are used in partition()). - // _temp_tuple points to _temp_tuple_buffer. Owned by this TupleSorter instance. - TupleRow* _temp_tuple_row; - uint8_t* _temp_tuple_buffer; - uint8_t* _swap_buffer; - - // Perform an insertion sort for rows in the range [first, last) in a run. - void insertion_sort(const TupleIterator& first, const TupleIterator& last); - - // Partitions the sequence of tuples in the range [first, last) in a run into two - // groups around the mid._current_tuple - i.e. tuples in first group are <= the mid._current_tuple - // and tuples in the second group are >= mid._current_tuple. Tuples are swapped in place to create the - // groups and the index to the first element in the second group is returned. - // Checks _state->is_cancelled() and returns early with an invalid result if true. - TupleIterator partition(TupleIterator first, TupleIterator last, TupleIterator& mid); - - // Select the median of three iterator tuples. taking the median tends to help us select better - // pivots that more evenly split the input range. This method makes selection of - // bad pivots very infrequent. - void find_the_median(TupleIterator& first, TupleIterator& last, TupleIterator& mid); - - // Performs a quicksort of rows in the range [first, last) followed by insertion sort - // for smaller groups of elements. - // Checks _state->is_cancelled() and returns early if true. - void sort_helper(TupleIterator first, TupleIterator last); - - // Swaps tuples pointed to by left and right using the swap buffer. - void swap(uint8_t* left, uint8_t* right); -}; // class TupleSorter - -// SpillSorter::Run methods -SpillSorter::Run::Run(SpillSorter* parent, TupleDescriptor* sort_tuple_desc, bool materialize_slots) - : _sorter(parent), - _sort_tuple_desc(sort_tuple_desc), - _sort_tuple_size(sort_tuple_desc->byte_size()), - _block_size(parent->_block_mgr->max_block_size()), - _has_var_len_slots(sort_tuple_desc->has_varlen_slots()), - _materialize_slots(materialize_slots), - _is_sorted(!materialize_slots), - _is_pinned(true), - _var_len_copy_block(nullptr), - _num_tuples(0) {} - -Status SpillSorter::Run::init() { - BufferedBlockMgr2::Block* block = nullptr; - RETURN_IF_ERROR( - _sorter->_block_mgr->get_new_block(_sorter->_block_mgr_client, nullptr, &block)); - if (block == nullptr) { - return Status::MemoryLimitExceeded(get_mem_alloc_failed_error_msg("fixed")); - } - _fixed_len_blocks.push_back(block); - - if (_has_var_len_slots) { - RETURN_IF_ERROR( - _sorter->_block_mgr->get_new_block(_sorter->_block_mgr_client, nullptr, &block)); - if (block == nullptr) { - return Status::MemoryLimitExceeded(get_mem_alloc_failed_error_msg("variable")); - } - _var_len_blocks.push_back(block); - - if (!_is_sorted) { - RETURN_IF_ERROR(_sorter->_block_mgr->get_new_block(_sorter->_block_mgr_client, nullptr, - &_var_len_copy_block)); - if (_var_len_copy_block == nullptr) { - return Status::MemoryLimitExceeded(get_mem_alloc_failed_error_msg("variable")); - } - } - } - if (!_is_sorted) { - _sorter->_initial_runs_counter->update(1); - } - return Status::OK(); -} - -template -Status SpillSorter::Run::add_batch(RowBatch* batch, int start_index, int* num_processed) { - DCHECK(!_fixed_len_blocks.empty()); - *num_processed = 0; - BufferedBlockMgr2::Block* cur_fixed_len_block = _fixed_len_blocks.back(); - - DCHECK_EQ(_materialize_slots, !_is_sorted); - if (!_materialize_slots) { - // If materialize slots is false the run is being constructed for an - // intermediate merge and the sort tuples have already been materialized. - // The input row should have the same schema as the sort tuples. - DCHECK_EQ(batch->row_desc().tuple_descriptors().size(), 1); - DCHECK_EQ(batch->row_desc().tuple_descriptors()[0], _sort_tuple_desc); - } - - // Input rows are copied/materialized into tuples allocated in _fixed_len_blocks. - // The variable length column data are copied into blocks stored in _var_len_blocks. - // Input row processing is split into two loops. - // The inner loop processes as many input rows as will fit in cur_fixed_len_block. - // The outer loop allocates a new block for fixed-len data if the input batch is - // not exhausted. - - // cur_input_index is the index into the input 'batch' of the current input row being - // processed. - int cur_input_index = start_index; - vector string_values; - string_values.reserve(_sort_tuple_desc->string_slots().size()); - while (cur_input_index < batch->num_rows()) { - // tuples_remaining is the number of tuples to copy/materialize into - // cur_fixed_len_block. - int tuples_remaining = cur_fixed_len_block->bytes_remaining() / _sort_tuple_size; - tuples_remaining = std::min(batch->num_rows() - cur_input_index, tuples_remaining); - - for (int i = 0; i < tuples_remaining; ++i) { - int total_var_len = 0; - TupleRow* input_row = batch->get_row(cur_input_index); - Tuple* new_tuple = cur_fixed_len_block->allocate(_sort_tuple_size); - if (_materialize_slots) { - new_tuple->materialize_exprs( - input_row, *_sort_tuple_desc, _sorter->_sort_tuple_slot_expr_ctxs, nullptr, - &string_values, &total_var_len); - if (total_var_len > _sorter->_block_mgr->max_block_size()) { - std::stringstream error_msg; - error_msg << "Variable length data in a single tuple larger than block size " - << total_var_len << " > " << _sorter->_block_mgr->max_block_size(); - return Status::InternalError(error_msg.str()); - } - } else { - memcpy(new_tuple, input_row->get_tuple(0), _sort_tuple_size); - if (has_var_len_data) { - collect_non_null_varslots(new_tuple, &string_values, &total_var_len); - } - } - - if (has_var_len_data) { - DCHECK_GT(_var_len_blocks.size(), 0); - BufferedBlockMgr2::Block* cur_var_len_block = _var_len_blocks.back(); - if (cur_var_len_block->bytes_remaining() < total_var_len) { - bool added = false; - RETURN_IF_ERROR(try_add_block(&_var_len_blocks, &added)); - if (added) { - cur_var_len_block = _var_len_blocks.back(); - } else { - // There was not enough space in the last var-len block for this tuple, and - // the run could not be extended. Return the fixed-len allocation and exit. - cur_fixed_len_block->return_allocation(_sort_tuple_size); - return Status::OK(); - } - } - - // Sorting of tuples containing array values is not implemented. The planner - // combined with projection should guarantee that none are in each tuple. - // for(const SlotDescriptor* collection_slot : - // _sort_tuple_desc->collection_slots()) { - // DCHECK(new_tuple->is_null(collection_slot->null_indicator_offset())); - // } - - char* var_data_ptr = cur_var_len_block->allocate(total_var_len); - if (_materialize_slots) { - copy_var_len_data(var_data_ptr, string_values); - } else { - int64_t offset = (_var_len_blocks.size() - 1) * _block_size; - offset += var_data_ptr - reinterpret_cast(cur_var_len_block->buffer()); - copy_var_len_data_convert_offset(var_data_ptr, offset, string_values); - } - } - ++_num_tuples; - ++*num_processed; - ++cur_input_index; - } - // There we already copy the tuple data to Block, So we need to release the mem - // in expr mempool to prevent memory leak - ExprContext::free_local_allocations(_sorter->_sort_tuple_slot_expr_ctxs); - - // If there are still rows left to process, get a new block for the fixed-length - // tuples. If the run is already too long, return. - if (cur_input_index < batch->num_rows()) { - bool added; - RETURN_IF_ERROR(try_add_block(&_fixed_len_blocks, &added)); - if (added) { - cur_fixed_len_block = _fixed_len_blocks.back(); - } else { - return Status::OK(); - } - } - } - return Status::OK(); -} - -void SpillSorter::Run::transfer_resources(RowBatch* row_batch) { - DCHECK(row_batch != nullptr); - for (BufferedBlockMgr2::Block* block : _fixed_len_blocks) { - if (block != nullptr) { - row_batch->add_block(block); - } - } - _fixed_len_blocks.clear(); - for (BufferedBlockMgr2::Block* block : _var_len_blocks) { - if (block != nullptr) { - row_batch->add_block(block); - } - } - _var_len_blocks.clear(); - if (_var_len_copy_block != nullptr) { - row_batch->add_block(_var_len_copy_block); - _var_len_copy_block = nullptr; - } -} - -void SpillSorter::Run::delete_all_blocks() { - for (BufferedBlockMgr2::Block* block : _fixed_len_blocks) { - if (block != nullptr) { - block->del(); - } - } - _fixed_len_blocks.clear(); - for (BufferedBlockMgr2::Block* block : _var_len_blocks) { - if (block != nullptr) { - block->del(); - } - } - _var_len_blocks.clear(); - if (_var_len_copy_block != nullptr) { - _var_len_copy_block->del(); - _var_len_copy_block = nullptr; - } -} - -Status SpillSorter::Run::unpin_all_blocks() { - vector sorted_var_len_blocks; - sorted_var_len_blocks.reserve(_var_len_blocks.size()); - vector string_values; - int64_t var_data_offset = 0; - int total_var_len = 0; - string_values.reserve(_sort_tuple_desc->string_slots().size()); - BufferedBlockMgr2::Block* cur_sorted_var_len_block = nullptr; - if (has_var_len_blocks()) { - DCHECK(_var_len_copy_block != nullptr); - sorted_var_len_blocks.push_back(_var_len_copy_block); - cur_sorted_var_len_block = _var_len_copy_block; - } else { - DCHECK(_var_len_copy_block == nullptr); - } - - for (int i = 0; i < _fixed_len_blocks.size(); ++i) { - BufferedBlockMgr2::Block* cur_fixed_block = _fixed_len_blocks[i]; - if (has_var_len_blocks()) { - for (int block_offset = 0; block_offset < cur_fixed_block->valid_data_len(); - block_offset += _sort_tuple_size) { - Tuple* cur_tuple = - reinterpret_cast(cur_fixed_block->buffer() + block_offset); - collect_non_null_varslots(cur_tuple, &string_values, &total_var_len); - DCHECK(cur_sorted_var_len_block != nullptr); - if (cur_sorted_var_len_block->bytes_remaining() < total_var_len) { - bool added = false; - RETURN_IF_ERROR(try_add_block(&sorted_var_len_blocks, &added)); - DCHECK(added); - cur_sorted_var_len_block = sorted_var_len_blocks.back(); - } - char* var_data_ptr = cur_sorted_var_len_block->allocate(total_var_len); - var_data_offset = _block_size * (sorted_var_len_blocks.size() - 1) + - (var_data_ptr - - reinterpret_cast(cur_sorted_var_len_block->buffer())); - copy_var_len_data_convert_offset(var_data_ptr, var_data_offset, string_values); - } - } - RETURN_IF_ERROR(cur_fixed_block->unpin()); - } - - if (_has_var_len_slots && _var_len_blocks.size() > 0) { - DCHECK_GT(sorted_var_len_blocks.back()->valid_data_len(), 0); - RETURN_IF_ERROR(sorted_var_len_blocks.back()->unpin()); - } - - // Clear _var_len_blocks and replace with it with the contents of sorted_var_len_blocks - for (BufferedBlockMgr2::Block* var_block : _var_len_blocks) { - var_block->del(); - } - _var_len_blocks.clear(); - sorted_var_len_blocks.swap(_var_len_blocks); - // Set _var_len_copy_block to nullptr since it's now in _var_len_blocks and is no longer - // needed. - _var_len_copy_block = nullptr; - _is_pinned = false; - return Status::OK(); -} - -Status SpillSorter::Run::prepare_read() { - _fixed_len_blocks_index = 0; - _fixed_len_block_offset = 0; - _var_len_blocks_index = 0; - _pin_next_fixed_len_block = _pin_next_var_len_block = false; - _num_tuples_returned = 0; - - _buffered_batch.reset(new RowBatch(*_sorter->_output_row_desc, _sorter->_state->batch_size())); - - // If the run is pinned, merge is not invoked, so _buffered_batch is not needed - // and the individual blocks do not need to be pinned. - if (_is_pinned) { - return Status::OK(); - } - - // Attempt to pin the first fixed and var-length blocks. In either case, pinning may - // fail if the number of reserved blocks is oversubscribed, see IMPALA-1590. - if (_fixed_len_blocks.size() > 0) { - bool pinned = false; - RETURN_IF_ERROR(_fixed_len_blocks[0]->pin(&pinned)); - // Temporary work-around for IMPALA-1868. Fail the query with OOM rather than - // DCHECK in case block pin fails. - if (!pinned) { - return Status::MemoryLimitExceeded(get_pin_failed_error_msg("fixed")); - } - } - - if (_has_var_len_slots && _var_len_blocks.size() > 0) { - bool pinned = false; - RETURN_IF_ERROR(_var_len_blocks[0]->pin(&pinned)); - // Temporary work-around for IMPALA-1590. Fail the query with OOM rather than - // DCHECK in case block pin fails. - if (!pinned) { - return Status::MemoryLimitExceeded(get_pin_failed_error_msg("variable")); - } - } - return Status::OK(); -} - -Status SpillSorter::Run::get_next_batch(RowBatch** output_batch) { - if (_buffered_batch.get() != nullptr) { - _buffered_batch->reset(); - // Fill more rows into _buffered_batch. - bool eos = false; - if (_has_var_len_slots && !_is_pinned) { - RETURN_IF_ERROR(get_next(_buffered_batch.get(), &eos)); - if (_buffered_batch->num_rows() == 0 && !eos) { - // No rows were filled because get_next() had to read the next var-len block - // Call get_next() again. - RETURN_IF_ERROR(get_next(_buffered_batch.get(), &eos)); - } - } else { - RETURN_IF_ERROR(get_next(_buffered_batch.get(), &eos)); - } - DCHECK(eos || _buffered_batch->num_rows() > 0); - if (eos) { - // No rows are filled in get_next() on eos, so this is safe. - DCHECK_EQ(_buffered_batch->num_rows(), 0); - _buffered_batch.reset(); - // The merge is complete. Delete the last blocks in the run. - _fixed_len_blocks.back()->del(); - _fixed_len_blocks[_fixed_len_blocks.size() - 1] = nullptr; - if (has_var_len_blocks()) { - _var_len_blocks.back()->del(); - _var_len_blocks[_var_len_blocks.size() - 1] = nullptr; - } - } - } - - // *output_batch == nullptr indicates eos. - *output_batch = _buffered_batch.get(); - return Status::OK(); -} - -template -Status SpillSorter::Run::get_next(RowBatch* output_batch, bool* eos) { - if (_fixed_len_blocks_index == _fixed_len_blocks.size()) { - *eos = true; - DCHECK_EQ(_num_tuples_returned, _num_tuples); - return Status::OK(); - } else { - *eos = false; - } - - BufferedBlockMgr2::Block* fixed_len_block = _fixed_len_blocks[_fixed_len_blocks_index]; - - if (!_is_pinned) { - // Pin the next block and delete the previous if set in the previous call to - // get_next(). - if (_pin_next_fixed_len_block) { - _fixed_len_blocks[_fixed_len_blocks_index - 1]->del(); - _fixed_len_blocks[_fixed_len_blocks_index - 1] = nullptr; - bool pinned; - RETURN_IF_ERROR(fixed_len_block->pin(&pinned)); - // Temporary work-around for IMPALA-2344. Fail the query with OOM rather than - // DCHECK in case block pin fails. - if (!pinned) { - return Status::MemoryLimitExceeded(get_pin_failed_error_msg("fixed")); - } - _pin_next_fixed_len_block = false; - } - if (_pin_next_var_len_block) { - _var_len_blocks[_var_len_blocks_index - 1]->del(); - _var_len_blocks[_var_len_blocks_index - 1] = nullptr; - bool pinned; - RETURN_IF_ERROR(_var_len_blocks[_var_len_blocks_index]->pin(&pinned)); - // Temporary work-around for IMPALA-2344. Fail the query with OOM rather than - // DCHECK in case block pin fails. - if (!pinned) { - return Status::MemoryLimitExceeded(get_pin_failed_error_msg("variable")); - } - _pin_next_var_len_block = false; - } - } - - // get_next fills rows into the output batch until a block boundary is reached. - DCHECK(fixed_len_block != nullptr); - while (!output_batch->at_capacity() && - _fixed_len_block_offset < fixed_len_block->valid_data_len()) { - DCHECK(fixed_len_block != nullptr); - Tuple* input_tuple = - reinterpret_cast(fixed_len_block->buffer() + _fixed_len_block_offset); - - if (convert_offset_to_ptr) { - // Convert the offsets in the var-len slots in input_tuple back to pointers. - const vector& string_slots = _sort_tuple_desc->string_slots(); - for (int i = 0; i < string_slots.size(); ++i) { - SlotDescriptor* slot_desc = string_slots[i]; - if (input_tuple->is_null(slot_desc->null_indicator_offset())) { - continue; - } - - DCHECK(slot_desc->type().is_string_type()); - StringValue* value = reinterpret_cast( - input_tuple->get_slot(slot_desc->tuple_offset())); - int64_t data_offset = reinterpret_cast(value->ptr); - - // data_offset is an offset in bytes from the beginning of the first block - // in _var_len_blocks. Convert it into an index into _var_len_blocks and an - // offset within that block. - int block_index = data_offset / _block_size; - int block_offset = data_offset % _block_size; - - if (block_index > _var_len_blocks_index) { - // We've reached the block boundary for the current var-len block. - // This tuple will be returned in the next call to get_next(). - DCHECK_EQ(block_index, _var_len_blocks_index + 1); - DCHECK_EQ(block_offset, 0); - DCHECK_EQ(i, 0); - _var_len_blocks_index = block_index; - _pin_next_var_len_block = true; - break; - } else { - DCHECK_EQ(block_index, _var_len_blocks_index) << "block_index: " << block_index; - // Calculate the address implied by the offset and assign it. - value->ptr = reinterpret_cast( - _var_len_blocks[_var_len_blocks_index]->buffer() + block_offset); - } // if (block_index > _var_len_blocks_index) - } // for (int i = 0; i < string_slots.size(); ++i) - - // The var-len data is in the next block, so end this call to get_next(). - if (_pin_next_var_len_block) { - break; - } - } // if (convert_offset_to_ptr) - - int output_row_idx = output_batch->add_row(); - output_batch->get_row(output_row_idx)->set_tuple(0, input_tuple); - output_batch->commit_last_row(); - _fixed_len_block_offset += _sort_tuple_size; - ++_num_tuples_returned; - } - - // Reached the block boundary, need to move to the next block. - if (_fixed_len_block_offset >= fixed_len_block->valid_data_len()) { - _pin_next_fixed_len_block = true; - ++_fixed_len_blocks_index; - _fixed_len_block_offset = 0; - } - return Status::OK(); -} - -void SpillSorter::Run::collect_non_null_varslots(Tuple* src, vector* string_values, - int* total_var_len) { - string_values->clear(); - *total_var_len = 0; - for (const SlotDescriptor* string_slot : _sort_tuple_desc->string_slots()) { - if (!src->is_null(string_slot->null_indicator_offset())) { - StringValue* string_val = - reinterpret_cast(src->get_slot(string_slot->tuple_offset())); - string_values->push_back(string_val); - *total_var_len += string_val->len; - } - } -} - -Status SpillSorter::Run::try_add_block(vector* block_sequence, - bool* added) { - DCHECK(!block_sequence->empty()); - BufferedBlockMgr2::Block* last_block = block_sequence->back(); - if (!_is_sorted) { - _sorter->_sorted_data_size->update(last_block->valid_data_len()); - last_block = nullptr; - } else { - // If the run is sorted, we will unpin the last block and extend the run. - } - - BufferedBlockMgr2::Block* new_block; - RETURN_IF_ERROR( - _sorter->_block_mgr->get_new_block(_sorter->_block_mgr_client, last_block, &new_block)); - if (new_block != nullptr) { - *added = true; - block_sequence->push_back(new_block); - } else { - *added = false; - } - return Status::OK(); -} - -void SpillSorter::Run::copy_var_len_data(char* dest, const vector& string_values) { - for (StringValue* string_val : string_values) { - memcpy(dest, string_val->ptr, string_val->len); - string_val->ptr = dest; - dest += string_val->len; - } -} - -void SpillSorter::Run::copy_var_len_data_convert_offset(char* dest, int64_t offset, - const vector& string_values) { - for (StringValue* string_val : string_values) { - memcpy(dest, string_val->ptr, string_val->len); - string_val->ptr = reinterpret_cast(offset); - dest += string_val->len; - offset += string_val->len; - } -} - -// SpillSorter::TupleSorter methods. -SpillSorter::TupleSorter::TupleSorter(const TupleRowComparator& comp, int64_t block_size, - int tuple_size, RuntimeState* state) - : _tuple_size(tuple_size), - _block_capacity(block_size / tuple_size), - _last_tuple_block_offset(tuple_size * ((block_size / tuple_size) - 1)), - _less_than_comp(comp), - _state(state) { - _temp_tuple_buffer = new uint8_t[tuple_size]; - _temp_tuple_row = reinterpret_cast(&_temp_tuple_buffer); - _swap_buffer = new uint8_t[tuple_size]; -} - -SpillSorter::TupleSorter::~TupleSorter() { - delete[] _temp_tuple_buffer; - delete[] _swap_buffer; -} - -void SpillSorter::TupleSorter::sort(Run* run) { - _run = run; - sort_helper(TupleIterator(this, 0), TupleIterator(this, _run->_num_tuples)); - run->_is_sorted = true; -} - -// Sort the sequence of tuples from [first, last). -// Begin with a sorted sequence of size 1 [first, first+1). -// During each pass of the outermost loop, add the next tuple (at position 'i') to -// the sorted sequence by comparing it to each element of the sorted sequence -// (reverse order) to find its correct place in the sorted sequence, copying tuples -// along the way. -void SpillSorter::TupleSorter::insertion_sort(const TupleIterator& first, - const TupleIterator& last) { - TupleIterator insert_iter = first; - insert_iter.next(); - for (; insert_iter._index < last._index; insert_iter.next()) { - // insert_iter points to the tuple after the currently sorted sequence that must - // be inserted into the sorted sequence. Copy to _temp_tuple_row since it may be - // overwritten by the one at position 'insert_iter - 1' - memcpy(_temp_tuple_buffer, insert_iter._current_tuple, _tuple_size); - - // 'iter' points to the tuple that _temp_tuple_row will be compared to. - // 'copy_to' is the where iter should be copied to if it is >= _temp_tuple_row. - // copy_to always to the next row after 'iter' - TupleIterator iter = insert_iter; - iter.prev(); - uint8_t* copy_to = insert_iter._current_tuple; - while (_less_than_comp(_temp_tuple_row, - reinterpret_cast(&iter._current_tuple))) { - memcpy(copy_to, iter._current_tuple, _tuple_size); - copy_to = iter._current_tuple; - // Break if 'iter' has reached the first row, meaning that _temp_tuple_row - // will be inserted in position 'first' - if (iter._index <= first._index) { - break; - } - iter.prev(); - } - - memcpy(copy_to, _temp_tuple_buffer, _tuple_size); - } -} - -void SpillSorter::TupleSorter::find_the_median(TupleSorter::TupleIterator& first, - TupleSorter::TupleIterator& last, - TupleSorter::TupleIterator& mid) { - last.prev(); - auto f_com_result = _less_than_comp.compare(reinterpret_cast(&first._current_tuple), - reinterpret_cast(&mid._current_tuple)); - auto l_com_result = _less_than_comp.compare(reinterpret_cast(&last._current_tuple), - reinterpret_cast(&mid._current_tuple)); - if (f_com_result == -1 && l_com_result == -1) { - if (_less_than_comp(reinterpret_cast(&first._current_tuple), - reinterpret_cast(&last._current_tuple))) { - swap(mid._current_tuple, last._current_tuple); - } else { - swap(mid._current_tuple, first._current_tuple); - } - } - if (f_com_result == 1 && l_com_result == 1) { - if (_less_than_comp(reinterpret_cast(&first._current_tuple), - reinterpret_cast(&last._current_tuple))) { - swap(mid._current_tuple, first._current_tuple); - } else { - swap(mid._current_tuple, last._current_tuple); - } - } -} - -SpillSorter::TupleSorter::TupleIterator SpillSorter::TupleSorter::partition(TupleIterator first, - TupleIterator last, - TupleIterator& mid) { - find_the_median(first, last, mid); - - // Copy &mid._current_tuple into temp_tuple since it points to a tuple within [first, last). - memcpy(_temp_tuple_buffer, mid._current_tuple, _tuple_size); - while (true) { - // Search for the first and last out-of-place elements, and swap them. - while (_less_than_comp(reinterpret_cast(&first._current_tuple), - _temp_tuple_row)) { - first.next(); - } - while (_less_than_comp(_temp_tuple_row, - reinterpret_cast(&last._current_tuple))) { - last.prev(); - } - - if (first._index >= last._index) { - break; - } - // Swap first and last tuples. - swap(first._current_tuple, last._current_tuple); - - first.next(); - last.prev(); - } - - return first; -} - -void SpillSorter::TupleSorter::sort_helper(TupleIterator first, TupleIterator last) { - if (UNLIKELY(_state->is_cancelled())) { - return; - } - // Use insertion sort for smaller sequences. - while (last._index - first._index > INSERTION_THRESHOLD) { - TupleIterator mid(this, first._index + (last._index - first._index) / 2); - - DCHECK(mid._current_tuple != nullptr); - // partition() splits the tuples in [first, last) into two groups (<= mid iter - // and >= mid iter) in-place. 'cut' is the index of the first tuple in the second group. - TupleIterator cut = partition(first, last, mid); - - // Recurse on the smaller partition. This limits stack size to log(n) stack frames. - if (last._index - cut._index < cut._index - first._index) { - sort_helper(cut, last); - last = cut; - } else { - sort_helper(first, cut); - first = cut; - } - - if (UNLIKELY(_state->is_cancelled())) { - return; - } - } - - insertion_sort(first, last); -} - -inline void SpillSorter::TupleSorter::swap(uint8_t* left, uint8_t* right) { - memcpy(_swap_buffer, left, _tuple_size); - memcpy(left, right, _tuple_size); - memcpy(right, _swap_buffer, _tuple_size); -} - -// SpillSorter methods -SpillSorter::SpillSorter(const TupleRowComparator& compare_less_than, - const vector& slot_materialize_expr_ctxs, - RowDescriptor* output_row_desc, RuntimeProfile* profile, - RuntimeState* state) - : _state(state), - _compare_less_than(compare_less_than), - _in_mem_tuple_sorter(nullptr), - _block_mgr(state->block_mgr2()), - _block_mgr_client(nullptr), - _has_var_len_slots(false), - _sort_tuple_slot_expr_ctxs(slot_materialize_expr_ctxs), - _output_row_desc(output_row_desc), - _unsorted_run(nullptr), - _profile(profile), - _initial_runs_counter(nullptr), - _num_merges_counter(nullptr), - _in_mem_sort_timer(nullptr), - _sorted_data_size(nullptr), - _spilled(false) {} - -SpillSorter::~SpillSorter() { - // Delete blocks from the block mgr. - for (deque::iterator it = _sorted_runs.begin(); it != _sorted_runs.end(); ++it) { - (*it)->delete_all_blocks(); - } - for (deque::iterator it = _merging_runs.begin(); it != _merging_runs.end(); ++it) { - (*it)->delete_all_blocks(); - } - if (_unsorted_run != nullptr) { - _unsorted_run->delete_all_blocks(); - } - _block_mgr->clear_reservations(_block_mgr_client); -} - -Status SpillSorter::init() { - DCHECK(_unsorted_run == nullptr) << "Already initialized"; - TupleDescriptor* sort_tuple_desc = _output_row_desc->tuple_descriptors()[0]; - _has_var_len_slots = sort_tuple_desc->has_varlen_slots(); - _in_mem_tuple_sorter.reset(new TupleSorter(_compare_less_than, _block_mgr->max_block_size(), - sort_tuple_desc->byte_size(), _state)); - _unsorted_run = _obj_pool.add(new Run(this, sort_tuple_desc, true)); - - _initial_runs_counter = ADD_COUNTER(_profile, "InitialRunsCreated", TUnit::UNIT); - _num_merges_counter = ADD_COUNTER(_profile, "TotalMergesPerformed", TUnit::UNIT); - _in_mem_sort_timer = ADD_TIMER(_profile, "InMemorySortTime"); - _sorted_data_size = ADD_COUNTER(_profile, "SortDataSize", TUnit::BYTES); - - int min_blocks_required = BLOCKS_REQUIRED_FOR_MERGE; - // Fixed and var-length blocks are separate, so we need BLOCKS_REQUIRED_FOR_MERGE - // blocks for both if there is var-length data. - if (_output_row_desc->tuple_descriptors()[0]->has_varlen_slots()) { - min_blocks_required *= 2; - } - RETURN_IF_ERROR(_block_mgr->register_client(min_blocks_required, _state, &_block_mgr_client)); - - DCHECK(_unsorted_run != nullptr); - RETURN_IF_ERROR(_unsorted_run->init()); - return Status::OK(); -} - -Status SpillSorter::add_batch(RowBatch* batch) { - DCHECK(_unsorted_run != nullptr); - DCHECK(batch != nullptr); - int num_processed = 0; - int cur_batch_index = 0; - while (cur_batch_index < batch->num_rows()) { - if (_has_var_len_slots) { - RETURN_IF_ERROR(_unsorted_run->add_batch(batch, cur_batch_index, &num_processed)); - } else { - RETURN_IF_ERROR( - _unsorted_run->add_batch(batch, cur_batch_index, &num_processed)); - } - cur_batch_index += num_processed; - if (cur_batch_index < batch->num_rows()) { - // The current run is full. Sort it and begin the next one. - RETURN_IF_ERROR(sort_run()); - RETURN_IF_ERROR(_sorted_runs.back()->unpin_all_blocks()); - _spilled = true; - _unsorted_run = - _obj_pool.add(new Run(this, _output_row_desc->tuple_descriptors()[0], true)); - RETURN_IF_ERROR(_unsorted_run->init()); - } - } - return Status::OK(); -} - -Status SpillSorter::input_done() { - // Sort the tuples accumulated so far in the current run. - RETURN_IF_ERROR(sort_run()); - - if (_sorted_runs.size() == 1) { - // The entire input fit in one run. Read sorted rows in get_next() directly - // from the sorted run. - RETURN_IF_ERROR(_sorted_runs.back()->prepare_read()); - } else { - // At least one merge is necessary. - int blocks_per_run = _has_var_len_slots ? 2 : 1; - int min_buffers_for_merge = _sorted_runs.size() * blocks_per_run; - // Check if the final run needs to be unpinned. - bool unpinned_final = false; - if (_block_mgr->num_free_buffers() < min_buffers_for_merge - blocks_per_run) { - // Number of available buffers is less than the size of the final run and - // the buffers needed to read the remainder of the runs in memory. - // Unpin the final run. - RETURN_IF_ERROR(_sorted_runs.back()->unpin_all_blocks()); - unpinned_final = true; - } else { - // No need to unpin the current run. There is enough memory to stream the - // other runs. - // TODO: revisit. It might be better to unpin some from this run if it means - // we can get double buffering in the other runs. - } - - // For an intermediate merge, intermediate_merge_batch contains deep-copied rows from - // the input runs. If (_unmerged_sorted_runs.size() > max_runs_per_final_merge), - // one or more intermediate merges are required. - // TODO: Attempt to allocate more memory before doing intermediate merges. This may - // be possible if other operators have relinquished memory after the sort has built - // its runs. - if (min_buffers_for_merge > _block_mgr->available_allocated_buffers()) { - DCHECK(unpinned_final); - RETURN_IF_ERROR(merge_intermediate_runs()); - } - - // Create the final merger. - RETURN_IF_ERROR(create_merger(_sorted_runs.size())); - } - return Status::OK(); -} - -Status SpillSorter::get_next(RowBatch* output_batch, bool* eos) { - if (_sorted_runs.size() == 1) { - DCHECK(_sorted_runs.back()->_is_pinned); - // In this case, only TupleRows are copied into output_batch. Sorted tuples are left - // in the pinned blocks in the single sorted run. - RETURN_IF_ERROR(_sorted_runs.back()->get_next(output_batch, eos)); - if (*eos) { - _sorted_runs.back()->transfer_resources(output_batch); - } - } else { - // In this case, rows are deep copied into output_batch. - RETURN_IF_ERROR(_merger->get_next(output_batch, eos)); - } - return Status::OK(); -} - -Status SpillSorter::reset() { - _merger.reset(); - _merging_runs.clear(); - _sorted_runs.clear(); - _obj_pool.clear(); - DCHECK(_unsorted_run == nullptr); - _unsorted_run = _obj_pool.add(new Run(this, _output_row_desc->tuple_descriptors()[0], true)); - RETURN_IF_ERROR(_unsorted_run->init()); - return Status::OK(); -} - -Status SpillSorter::sort_run() { - BufferedBlockMgr2::Block* last_block = _unsorted_run->_fixed_len_blocks.back(); - if (last_block->valid_data_len() > 0) { - _sorted_data_size->update(last_block->valid_data_len()); - } else { - last_block->del(); - _unsorted_run->_fixed_len_blocks.pop_back(); - } - if (_has_var_len_slots) { - DCHECK(_unsorted_run->_var_len_copy_block != nullptr); - last_block = _unsorted_run->_var_len_blocks.back(); - if (last_block->valid_data_len() > 0) { - _sorted_data_size->update(last_block->valid_data_len()); - } else { - last_block->del(); - _unsorted_run->_var_len_blocks.pop_back(); - if (_unsorted_run->_var_len_blocks.size() == 0) { - _unsorted_run->_var_len_copy_block->del(); - _unsorted_run->_var_len_copy_block = nullptr; - } - } - } - { - SCOPED_TIMER(_in_mem_sort_timer); - _in_mem_tuple_sorter->sort(_unsorted_run); - RETURN_IF_CANCELLED(_state); - } - _sorted_runs.push_back(_unsorted_run); - _unsorted_run = nullptr; - return Status::OK(); -} - -uint64_t SpillSorter::estimate_merge_mem(uint64_t available_blocks, RowDescriptor* row_desc, - int merge_batch_size) { - bool has_var_len_slots = row_desc->tuple_descriptors()[0]->has_varlen_slots(); - int blocks_per_run = has_var_len_slots ? 2 : 1; - int max_input_runs_per_merge = (available_blocks / blocks_per_run) - 1; - // During a merge, the batches corresponding to the input runs contain only TupleRows. - // (The data itself is in pinned blocks held by the run) - uint64_t input_batch_mem = merge_batch_size * sizeof(Tuple*) * max_input_runs_per_merge; - // Since rows are deep copied into the output batch for the merger, use a pessimistic - // estimate of the memory required. - uint64_t output_batch_mem = RowBatch::AT_CAPACITY_MEM_USAGE; - - return input_batch_mem + output_batch_mem; -} - -Status SpillSorter::merge_intermediate_runs() { - int blocks_per_run = _has_var_len_slots ? 2 : 1; - int max_runs_per_final_merge = _block_mgr->available_allocated_buffers() / blocks_per_run; - - // During an intermediate merge, blocks from the output sorted run will have to be pinned. - int max_runs_per_intermediate_merge = max_runs_per_final_merge - 1; - DCHECK_GT(max_runs_per_intermediate_merge, 1); - // For an intermediate merge, intermediate_merge_batch contains deep-copied rows from - // the input runs. If (_sorted_runs.size() > max_runs_per_final_merge), - // one or more intermediate merges are required. - unique_ptr intermediate_merge_batch; - while (_sorted_runs.size() > max_runs_per_final_merge) { - // An intermediate merge adds one merge to _unmerged_sorted_runs. - // Merging 'runs - (_max_runs_final - 1)' number of runs is sufficient to guarantee - // that the final merge can be performed. - int num_runs_to_merge = - std::min(max_runs_per_intermediate_merge, - _sorted_runs.size() - max_runs_per_intermediate_merge); - RETURN_IF_ERROR(create_merger(num_runs_to_merge)); - RowBatch intermediate_merge_batch(*_output_row_desc, _state->batch_size()); - // merged_run is the new sorted run that is produced by the intermediate merge. - Run* merged_run = - _obj_pool.add(new Run(this, _output_row_desc->tuple_descriptors()[0], false)); - RETURN_IF_ERROR(merged_run->init()); - bool eos = false; - while (!eos) { - // Copy rows into the new run until done. - int num_copied = 0; - RETURN_IF_CANCELLED(_state); - RETURN_IF_ERROR(_merger->get_next(&intermediate_merge_batch, &eos)); - Status ret_status; - if (_has_var_len_slots) { - ret_status = merged_run->add_batch(&intermediate_merge_batch, 0, &num_copied); - } else { - ret_status = - merged_run->add_batch(&intermediate_merge_batch, 0, &num_copied); - } - if (!ret_status.ok()) return ret_status; - - DCHECK_EQ(num_copied, intermediate_merge_batch.num_rows()); - intermediate_merge_batch.reset(); - } - - BufferedBlockMgr2::Block* last_block = merged_run->_fixed_len_blocks.back(); - if (last_block->valid_data_len() > 0) { - RETURN_IF_ERROR(last_block->unpin()); - } else { - last_block->del(); - merged_run->_fixed_len_blocks.pop_back(); - } - if (_has_var_len_slots) { - last_block = merged_run->_var_len_blocks.back(); - if (last_block->valid_data_len() > 0) { - RETURN_IF_ERROR(last_block->unpin()); - } else { - last_block->del(); - merged_run->_var_len_blocks.pop_back(); - } - } - merged_run->_is_pinned = false; - _sorted_runs.push_back(merged_run); - } - - return Status::OK(); -} - -Status SpillSorter::create_merger(int num_runs) { - DCHECK_GT(num_runs, 1); - - // Clean up the runs from the previous merge. - for (deque::iterator it = _merging_runs.begin(); it != _merging_runs.end(); ++it) { - (*it)->delete_all_blocks(); - } - _merging_runs.clear(); - _merger.reset(new SortedRunMerger(_compare_less_than, _output_row_desc, _profile, true)); - - vector> merge_runs; - merge_runs.reserve(num_runs); - for (int i = 0; i < num_runs; ++i) { - Run* run = _sorted_runs.front(); - RETURN_IF_ERROR(run->prepare_read()); - // Run::get_next_batch() is used by the merger to retrieve a batch of rows to merge - // from this run. - merge_runs.push_back( - bind(mem_fn(&Run::get_next_batch), run, std::placeholders::_1)); - _sorted_runs.pop_front(); - _merging_runs.push_back(run); - } - RETURN_IF_ERROR(_merger->prepare(merge_runs)); - - _num_merges_counter->update(1); - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/runtime/spill_sorter.h b/be/src/runtime/spill_sorter.h deleted file mode 100644 index 20960b7230..0000000000 --- a/be/src/runtime/spill_sorter.h +++ /dev/null @@ -1,215 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "runtime/buffered_block_mgr2.h" -#include "util/tuple_row_compare.h" - -namespace doris { - -class SortedRunMerger; -class RuntimeProfile; -class RowBatch; - -// SpillSorter contains the external sort implementation. Its purpose is to sort arbitrarily -// large input data sets with a fixed memory budget by spilling data to disk if -// necessary. BufferedBlockMgr2 is used to allocate and manage blocks of data to be -// sorted. -// -// The client API for SpillSorter is as follows: -// add_batch() is used to add input rows to be sorted. Multiple tuples in an input row are -// materialized into a row with a single tuple (the sort tuple) using the materialization -// exprs in _sort_tuple_slot_expr_ctxs. The sort tuples are sorted according to the sort -// parameters and output by the sorter. -// add_batch() can be called multiple times. -// -// input_done() is called to indicate the end of input. If multiple sorted runs were -// created, it triggers intermediate merge steps (if necessary) and creates the final -// merger that returns results via get_next(). -// -// get_next() is used to retrieve sorted rows. It can be called multiple times. -// add_batch(), input_done() and get_next() must be called in that order. -// -// Batches of input rows are collected into a sequence of pinned BufferedBlockMgr2 blocks -// called a run. The maximum size of a run is determined by the maximum available buffers -// in the block manager. After the run is full, it is sorted in memory, unpinned and the -// next run is collected. The variable-length column data (e.g. string slots) in the -// materialized sort tuples are stored in separate sequence of blocks from the tuples -// themselves. -// When the blocks containing tuples in a run are unpinned, the var-len slot pointers are -// converted to offsets from the start of the first var-len data block. When a block is -// read back, these offsets are converted back to pointers. -// The in-memory sorter sorts the fixed-length tuples in-place. The output rows have the -// same schema as the materialized sort tuples. -// -// After the input is consumed, the sorter is left with one or more sorted runs. The -// client calls get_next(output_batch) to retrieve batches of sorted rows. If there are -// multiple runs, the runs are merged using SortedRunMerger to produce a stream of sorted -// tuples. At least one block per run (two if there are var-length slots) must be pinned -// in memory during a merge, so multiple merges may be necessary if the number of runs is -// too large. During a merge, rows from multiple sorted input runs are compared and copied -// into a single larger run. One input batch is created to hold tuple rows for each -// input run, and one batch is created to hold deep copied rows (i.e. ptrs + data) from -// the output of the merge. -// -// If there is a single sorted run (i.e. no merge required), only tuple rows are -// copied into the output batch supplied by get_next, and the data itself is left in -// pinned blocks held by the sorter. -// -// Note that init() must be called right after the constructor. -// -// During a merge, one row batch is created for each input run, and one batch is created -// for the output of the merge (if is not the final merge). It is assumed that the memory -// for these batches have already been accounted for in the memory budget for the sort. -// That is, the memory for these batches does not come out of the block buffer manager. -// -// TODO: Not necessary to actually copy var-len data - instead take ownership of the -// var-length data in the input batch. Copying can be deferred until a run is unpinned. -// TODO: When the first run is constructed, create a sequence of pointers to materialized -// tuples. If the input fits in memory, the pointers can be sorted instead of sorting the -// tuples in place. -class SpillSorter { -public: - // sort_tuple_slot_exprs are the slot exprs used to materialize the tuple to be sorted. - // compare_less_than is a comparator for the sort tuples (returns true if lhs < rhs). - // _merge_batch_size is the size of the batches created to provide rows to the merger - // and retrieve rows from an intermediate merger. - SpillSorter(const TupleRowComparator& compare_less_than, - const std::vector& sort_tuple_slot_expr_ctxs, - RowDescriptor* output_row_desc, RuntimeProfile* profile, RuntimeState* state); - - ~SpillSorter(); - - // Initialization code, including registration to the block_mgr and the initialization - // of the _unsorted_run, both of these may fail. - Status init(); - - // Adds a batch of input rows to the current unsorted run. - Status add_batch(RowBatch* batch); - - // Called to indicate there is no more input. Triggers the creation of merger(s) if - // necessary. - Status input_done(); - - // Get the next batch of sorted output rows from the sorter. - Status get_next(RowBatch* batch, bool* eos); - - // Resets all internal state like ExecNode::reset(). - // init() must have been called, add_batch()/get_next()/input_done() - // may or may not have been called. - Status reset(); - - bool is_spilled() { return _spilled; } - // Estimate the memory overhead in bytes for an intermediate merge, based on the - // maximum number of memory buffers available for the sort, the row descriptor for - // the sorted tuples and the batch size used (in rows). - // This is a pessimistic estimate of the memory needed by the sorter in addition to the - // memory used by the block buffer manager. The memory overhead is 0 if the input fits - // in memory. Merges incur additional memory overhead because row batches are created - // to hold tuple rows from the input runs, and the merger itself deep-copies - // sort-merged rows into its output batch. - static uint64_t estimate_merge_mem(uint64_t available_blocks, RowDescriptor* row_desc, - int merge_batch_size); - -private: - class Run; - class TupleSorter; - - // Create a SortedRunMerger from the first 'num_runs' sorted runs in _sorted_runs and - // assign it to _merger. The runs to be merged are removed from _sorted_runs. - // The SpillSorter sets the deep_copy_input flag to true for the merger, since the blocks - // containing input run data will be unpinned as input runs are read. - Status create_merger(int num_runs); - - // Repeatedly replaces multiple smaller runs in _sorted_runs with a single larger - // merged run until the number of remaining runs is small enough for a single merge. - // At least 1 (2 if var-len slots) block from each sorted run must be pinned for - // a merge. If the number of sorted runs is too large, merge sets of smaller runs - // into large runs until a final merge can be performed. An intermediate row batch - // containing deep copied rows is used for the output of each intermediate merge. - Status merge_intermediate_runs(); - - // Sorts _unsorted_run and appends it to the list of sorted runs. Deletes any empty - // blocks at the end of the run. Updates the sort bytes counter if necessary. - Status sort_run(); - - // Runtime state instance used to check for cancellation. Not owned. - RuntimeState* const _state; - - // In memory sorter and less-than comparator. - TupleRowComparator _compare_less_than; - std::unique_ptr _in_mem_tuple_sorter; - - // Block manager object used to allocate, pin and release runs. Not owned by SpillSorter. - BufferedBlockMgr2* _block_mgr; - - // Handle to block mgr to make allocations from. - BufferedBlockMgr2::Client* _block_mgr_client; - - // True if the tuples to be sorted have var-length slots. - bool _has_var_len_slots; - - // Expressions used to materialize the sort tuple. Contains one expr per slot in the tuple. - std::vector _sort_tuple_slot_expr_ctxs; - - // Descriptor for the sort tuple. Input rows are materialized into 1 tuple before - // sorting. Not owned by the SpillSorter. - RowDescriptor* _output_row_desc; - - ///////////////////////////////////////// - // BEGIN: Members that must be reset() - - // The current unsorted run that is being collected. Is sorted and added to - // _sorted_runs after it is full (i.e. number of blocks allocated == max available - // buffers) or after the input is complete. Owned and placed in _obj_pool. - // When it is added to _sorted_runs, it is set to nullptr. - Run* _unsorted_run; - - // List of sorted runs that have been produced but not merged. _unsorted_run is added - // to this list after an in-memory sort. Sorted runs produced by intermediate merges - // are also added to this list. Runs are added to the object pool. - std::deque _sorted_runs; - - // Merger object (intermediate or final) currently used to produce sorted runs. - // Only one merge is performed at a time. Will never be used if the input fits in - // memory. - std::unique_ptr _merger; - - // Runs that are currently processed by the _merge. - // These runs can be deleted when we are done with the current merge. - std::deque _merging_runs; - - // Pool of owned Run objects. Maintains Runs objects across non-freeing reset() calls. - ObjectPool _obj_pool; - - // END: Members that must be reset() - ///////////////////////////////////////// - - // Runtime profile and counters for this sorter instance. - RuntimeProfile* _profile; - RuntimeProfile::Counter* _initial_runs_counter; - RuntimeProfile::Counter* _num_merges_counter; - RuntimeProfile::Counter* _in_mem_sort_timer; - RuntimeProfile::Counter* _sorted_data_size; - - bool _spilled; -}; - -} // namespace doris diff --git a/be/src/service/backend_service.cpp b/be/src/service/backend_service.cpp index fd34f81487..0853aa1fa7 100644 --- a/be/src/service/backend_service.cpp +++ b/be/src/service/backend_service.cpp @@ -35,7 +35,6 @@ #include "gen_cpp/Types_types.h" #include "gutil/strings/substitute.h" #include "olap/storage_engine.h" -#include "runtime/data_stream_mgr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/export_task_mgr.h" diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 6cd47e8a1b..8f781c5d25 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -29,7 +29,6 @@ #include "olap/storage_engine.h" #include "olap/tablet.h" #include "runtime/buffer_control_block.h" -#include "runtime/data_stream_mgr.h" #include "runtime/exec_env.h" #include "runtime/fold_constant_executor.h" #include "runtime/fragment_mgr.h" @@ -111,60 +110,18 @@ PInternalServiceImpl::~PInternalServiceImpl() { void PInternalServiceImpl::transmit_data(google::protobuf::RpcController* cntl_base, const PTransmitDataParams* request, PTransmitDataResult* response, - google::protobuf::Closure* done) { - // TODO(zxy) delete in 1.2 version - google::protobuf::Closure* new_done = new NewHttpClosure(done); - brpc::Controller* cntl = static_cast(cntl_base); - attachment_transfer_request_row_batch(request, cntl); - - _transmit_data(cntl_base, request, response, new_done, Status::OK()); -} + google::protobuf::Closure* done) {} void PInternalServiceImpl::transmit_data_by_http(google::protobuf::RpcController* cntl_base, const PEmptyRequest* request, PTransmitDataResult* response, - google::protobuf::Closure* done) { - PTransmitDataParams* new_request = new PTransmitDataParams(); - google::protobuf::Closure* new_done = - new NewHttpClosure(new_request, done); - brpc::Controller* cntl = static_cast(cntl_base); - Status st = attachment_extract_request_contain_tuple(new_request, cntl); - _transmit_data(cntl_base, new_request, response, new_done, st); -} + google::protobuf::Closure* done) {} void PInternalServiceImpl::_transmit_data(google::protobuf::RpcController* cntl_base, const PTransmitDataParams* request, PTransmitDataResult* response, google::protobuf::Closure* done, - const Status& extract_st) { - std::string query_id; - TUniqueId finst_id; - if (request->has_query_id()) { - query_id = print_id(request->query_id()); - finst_id.__set_hi(request->finst_id().hi()); - finst_id.__set_lo(request->finst_id().lo()); - } - VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id()) - << " query_id=" << query_id << " node=" << request->node_id(); - // The response is accessed when done->Run is called in transmit_data(), - // give response a default value to avoid null pointers in high concurrency. - Status st; - st.to_protobuf(response->mutable_status()); - if (extract_st.ok()) { - st = _exec_env->stream_mgr()->transmit_data(request, &done); - if (!st.ok()) { - LOG(WARNING) << "transmit_data failed, message=" << st - << ", fragment_instance_id=" << print_id(request->finst_id()) - << ", node=" << request->node_id(); - } - } else { - st = extract_st; - } - if (done != nullptr) { - st.to_protobuf(response->mutable_status()); - done->Run(); - } -} + const Status& extract_st) {} void PInternalServiceImpl::tablet_writer_open(google::protobuf::RpcController* controller, const PTabletWriterOpenRequest* request, diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 6fe40c4bcd..067d7442fa 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -63,7 +63,6 @@ set(UTIL_FILES url_coding.cpp file_utils.cpp mysql_row_buffer.cpp - tuple_row_compare.cpp error_util.cc filesystem_util.cc load_error_hub.cpp diff --git a/be/src/util/tuple_row_compare.cpp b/be/src/util/tuple_row_compare.cpp deleted file mode 100644 index ab71dd1972..0000000000 --- a/be/src/util/tuple_row_compare.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/tuple_row_compare.h" - -#include "runtime/runtime_state.h" - -namespace doris {} diff --git a/be/src/util/tuple_row_compare.h b/be/src/util/tuple_row_compare.h deleted file mode 100644 index 7cd3073986..0000000000 --- a/be/src/util/tuple_row_compare.h +++ /dev/null @@ -1,144 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exec/sort_exec_exprs.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/descriptors.h" -#include "runtime/raw_value.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" - -namespace doris { - -class TupleRowComparator { -public: - // Compares two TupleRows based on a set of exprs, in order. - // We use is_asc to determine, for each expr, if it should be ascending or descending - // sort order. - // We use nulls_first to determine, for each expr, if nulls should come before - // or after all other values. - TupleRowComparator(const std::vector& key_expr_ctxs_lhs, - const std::vector& key_expr_ctxs_rhs, - const std::vector& is_asc, const std::vector& nulls_first) - : _key_expr_ctxs_lhs(key_expr_ctxs_lhs), - _key_expr_ctxs_rhs(key_expr_ctxs_rhs), - _is_asc(is_asc) { - // DCHECK_EQ(key_expr_ctxs_lhs.size(), key_expr_ctxs_rhs.size()); - DCHECK_EQ(key_expr_ctxs_lhs.size(), is_asc.size()); - DCHECK_EQ(key_expr_ctxs_lhs.size(), nulls_first.size()); - _nulls_first.reserve(key_expr_ctxs_lhs.size()); - for (int i = 0; i < key_expr_ctxs_lhs.size(); ++i) { - _nulls_first.push_back(nulls_first[i] ? -1 : 1); - } - } - - TupleRowComparator(const std::vector& key_expr_ctxs_lhs, - const std::vector& key_expr_ctxs_rhs, bool is_asc, - bool nulls_first) - : _key_expr_ctxs_lhs(key_expr_ctxs_lhs), - _key_expr_ctxs_rhs(key_expr_ctxs_rhs), - _is_asc(key_expr_ctxs_lhs.size(), is_asc), - _nulls_first(key_expr_ctxs_lhs.size(), nulls_first ? -1 : 1) { - DCHECK_EQ(key_expr_ctxs_lhs.size(), key_expr_ctxs_rhs.size()); - } - - // 'sort_key_exprs' must have already been prepared. - // 'is_asc' determines, for each expr, if it should be ascending or descending sort - // order. - // 'nulls_first' determines, for each expr, if nulls should come before or after all - // other values. - TupleRowComparator(const SortExecExprs& sort_key_exprs, const std::vector& is_asc, - const std::vector& nulls_first) - : _key_expr_ctxs_lhs(sort_key_exprs.lhs_ordering_expr_ctxs()), - _key_expr_ctxs_rhs(sort_key_exprs.rhs_ordering_expr_ctxs()), - _is_asc(is_asc) { - DCHECK_EQ(_key_expr_ctxs_lhs.size(), is_asc.size()); - DCHECK_EQ(_key_expr_ctxs_lhs.size(), nulls_first.size()); - _nulls_first.reserve(_key_expr_ctxs_lhs.size()); - for (int i = 0; i < _key_expr_ctxs_lhs.size(); ++i) { - _nulls_first.push_back(nulls_first[i] ? -1 : 1); - } - } - - TupleRowComparator(const SortExecExprs& sort_key_exprs, bool is_asc, bool nulls_first) - : _key_expr_ctxs_lhs(sort_key_exprs.lhs_ordering_expr_ctxs()), - _key_expr_ctxs_rhs(sort_key_exprs.rhs_ordering_expr_ctxs()), - _is_asc(_key_expr_ctxs_lhs.size(), is_asc), - _nulls_first(_key_expr_ctxs_lhs.size(), nulls_first ? -1 : 1) {} - - // Returns a negative value if lhs is less than rhs, a positive value if lhs is greater - // than rhs, or 0 if they are equal. All exprs (_key_exprs_lhs and _key_exprs_rhs) - // must have been prepared and opened before calling this. i.e. 'sort_key_exprs' in the - // constructor must have been opened. - int compare(TupleRow* lhs, TupleRow* rhs) const { - for (int i = 0; i < _key_expr_ctxs_lhs.size(); ++i) { - void* lhs_value = _key_expr_ctxs_lhs[i]->get_value(lhs); - void* rhs_value = _key_expr_ctxs_rhs[i]->get_value(rhs); - - // The sort order of NULLs is independent of asc/desc. - if (lhs_value == nullptr && rhs_value == nullptr) { - continue; - } - if (lhs_value == nullptr && rhs_value != nullptr) { - return _nulls_first[i]; - } - if (lhs_value != nullptr && rhs_value == nullptr) { - return -_nulls_first[i]; - } - - int result = - RawValue::compare(lhs_value, rhs_value, _key_expr_ctxs_lhs[i]->root()->type()); - if (!_is_asc[i]) { - result = -result; - } - if (result != 0) { - return result; - } - // Otherwise, try the next Expr - } - return 0; // fully equivalent key - } - - // Returns true if lhs is strictly less than rhs. - // All exprs (_key_exprs_lhs and _key_exprs_rhs) must have been prepared and opened - // before calling this. - bool operator()(TupleRow* lhs, TupleRow* rhs) const { - int result = compare(lhs, rhs); - if (result < 0) { - return true; - } - return false; - } - - bool operator()(Tuple* lhs, Tuple* rhs) const { - TupleRow* lhs_row = reinterpret_cast(&lhs); - TupleRow* rhs_row = reinterpret_cast(&rhs); - return (*this)(lhs_row, rhs_row); - } - -private: - const std::vector& _key_expr_ctxs_lhs; - const std::vector& _key_expr_ctxs_rhs; - std::vector _is_asc; - std::vector _nulls_first; - - typedef int (*CompareFn)(ExprContext* const*, ExprContext* const*, TupleRow*, TupleRow*); -}; -} // namespace doris diff --git a/be/src/util/tuple_row_zorder_compare.h b/be/src/util/tuple_row_zorder_compare.h index b27ad14999..337345713e 100644 --- a/be/src/util/tuple_row_zorder_compare.h +++ b/be/src/util/tuple_row_zorder_compare.h @@ -17,7 +17,6 @@ #pragma once -#include "exec/sort_exec_exprs.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "olap/row.h" diff --git a/be/src/exec/mysql_scanner.cpp b/be/src/vec/exec/scan/mysql_scanner.cpp similarity index 98% rename from be/src/exec/mysql_scanner.cpp rename to be/src/vec/exec/scan/mysql_scanner.cpp index 9ff38487a7..d3ffa8ee6a 100644 --- a/be/src/exec/mysql_scanner.cpp +++ b/be/src/vec/exec/scan/mysql_scanner.cpp @@ -23,7 +23,7 @@ #include "common/logging.h" #include "mysql_scanner.h" -namespace doris { +namespace doris::vectorized { MysqlScanner::MysqlScanner(const MysqlScannerParam& param) : _my_param(param), @@ -188,6 +188,6 @@ Status MysqlScanner::_error_status(const std::string& prefix) { return Status::InternalError(msg.str()); } -} // namespace doris +} // namespace doris::vectorized /* vim: set ts=4 sw=4 sts=4 tw=100 noet: */ diff --git a/be/src/exec/mysql_scanner.h b/be/src/vec/exec/scan/mysql_scanner.h similarity index 96% rename from be/src/exec/mysql_scanner.h rename to be/src/vec/exec/scan/mysql_scanner.h index a75155e868..8aa324f1e1 100644 --- a/be/src/exec/mysql_scanner.h +++ b/be/src/vec/exec/scan/mysql_scanner.h @@ -32,7 +32,7 @@ #define __DorisMysqlRes void #endif -namespace doris { +namespace doris::vectorized { struct MysqlScannerParam { std::string host; @@ -72,4 +72,4 @@ private: int _field_num; }; -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/vbroker_scanner.cpp b/be/src/vec/exec/vbroker_scanner.cpp index df9f7e79ce..9fe05c359a 100644 --- a/be/src/vec/exec/vbroker_scanner.cpp +++ b/be/src/vec/exec/vbroker_scanner.cpp @@ -19,9 +19,15 @@ #include +#include "common/consts.h" #include "exec/line_reader.h" +#include "exec/plain_binary_line_reader.h" +#include "exec/plain_text_line_reader.h" #include "exec/text_converter.h" #include "exec/text_converter.hpp" +#include "gen_cpp/internal_service.pb.h" +#include "io/file_factory.h" +#include "util/utf8_check.h" namespace doris::vectorized { @@ -30,13 +36,358 @@ VBrokerScanner::VBrokerScanner(RuntimeState* state, RuntimeProfile* profile, const std::vector& ranges, const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter) - : BrokerScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, - counter) { + : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), + _cur_file_reader(nullptr), + _cur_line_reader(nullptr), + _cur_decompressor(nullptr), + _cur_line_reader_eof(false), + _skip_lines(0) { + if (params.__isset.column_separator_length && params.column_separator_length > 1) { + _value_separator = params.column_separator_str; + _value_separator_length = params.column_separator_length; + } else { + _value_separator.push_back(static_cast(params.column_separator)); + _value_separator_length = 1; + } + if (params.__isset.line_delimiter_length && params.line_delimiter_length > 1) { + _line_delimiter = params.line_delimiter_str; + _line_delimiter_length = params.line_delimiter_length; + } else { + _line_delimiter.push_back(static_cast(params.line_delimiter)); + _line_delimiter_length = 1; + } + _split_values.reserve(sizeof(Slice) * params.src_slot_ids.size()); _text_converter.reset(new (std::nothrow) TextConverter('\\')); _src_block_mem_reuse = true; } -VBrokerScanner::~VBrokerScanner() = default; +VBrokerScanner::~VBrokerScanner() { + close(); +} + +Status VBrokerScanner::open() { + RETURN_IF_ERROR(BaseScanner::open()); // base default function + return Status::OK(); +} + +Status VBrokerScanner::_open_file_reader() { + const TBrokerRangeDesc& range = _ranges[_next_range]; + int64_t start_offset = range.start_offset; + if (start_offset != 0) { + start_offset -= 1; + } + //means first range, skip + if (start_offset == 0 && range.header_type.size() > 0) { + std::string header_type = to_lower(range.header_type); + if (header_type == BeConsts::CSV_WITH_NAMES) { + _skip_lines = 1; + } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) { + _skip_lines = 2; + } + } + + if (range.file_type == TFileType::FILE_STREAM) { + RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s)); + _real_reader = _cur_file_reader_s.get(); + } else { + RETURN_IF_ERROR(FileFactory::create_file_reader( + range.file_type, _state->exec_env(), _profile, _broker_addresses, + _params.properties, range, start_offset, _cur_file_reader)); + _real_reader = _cur_file_reader.get(); + } + return _real_reader->open(); +} + +Status VBrokerScanner::_create_decompressor(TFileFormatType::type type) { + if (_cur_decompressor != nullptr) { + delete _cur_decompressor; + _cur_decompressor = nullptr; + } + + CompressType compress_type; + switch (type) { + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_JSON: + case TFileFormatType::FORMAT_PROTO: + compress_type = CompressType::UNCOMPRESSED; + break; + case TFileFormatType::FORMAT_CSV_GZ: + compress_type = CompressType::GZIP; + break; + case TFileFormatType::FORMAT_CSV_BZ2: + compress_type = CompressType::BZIP2; + break; + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + compress_type = CompressType::LZ4FRAME; + break; + case TFileFormatType::FORMAT_CSV_LZOP: + compress_type = CompressType::LZOP; + break; + case TFileFormatType::FORMAT_CSV_DEFLATE: + compress_type = CompressType::DEFLATE; + break; + default: { + return Status::InternalError("Unknown format type, cannot inference compress type, type={}", + type); + } + } + RETURN_IF_ERROR(Decompressor::create_decompressor(compress_type, &_cur_decompressor)); + + return Status::OK(); +} + +Status VBrokerScanner::_open_line_reader() { + if (_cur_decompressor != nullptr) { + delete _cur_decompressor; + _cur_decompressor = nullptr; + } + + if (_cur_line_reader != nullptr) { + delete _cur_line_reader; + _cur_line_reader = nullptr; + } + + const TBrokerRangeDesc& range = _ranges[_next_range]; + int64_t size = range.size; + if (range.start_offset != 0) { + if (range.format_type != TFileFormatType::FORMAT_CSV_PLAIN) { + return Status::InternalError("For now we do not support split compressed file"); + } + size += 1; + // not first range will always skip one line + _skip_lines = 1; + } + + // create decompressor. + // _decompressor may be nullptr if this is not a compressed file + RETURN_IF_ERROR(_create_decompressor(range.format_type)); + + _file_format_type = range.format_type; + // open line reader + switch (range.format_type) { + case TFileFormatType::FORMAT_CSV_PLAIN: + case TFileFormatType::FORMAT_CSV_GZ: + case TFileFormatType::FORMAT_CSV_BZ2: + case TFileFormatType::FORMAT_CSV_LZ4FRAME: + case TFileFormatType::FORMAT_CSV_LZOP: + case TFileFormatType::FORMAT_CSV_DEFLATE: + _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, _cur_decompressor, size, + _line_delimiter, _line_delimiter_length); + break; + case TFileFormatType::FORMAT_PROTO: + _cur_line_reader = new PlainBinaryLineReader(_real_reader); + break; + default: { + return Status::InternalError("Unknown format type, cannot init line reader, type={}", + range.format_type); + } + } + + _cur_line_reader_eof = false; + + return Status::OK(); +} + +void VBrokerScanner::close() { + BaseScanner::close(); + if (_cur_decompressor != nullptr) { + delete _cur_decompressor; + _cur_decompressor = nullptr; + } + + if (_cur_line_reader != nullptr) { + delete _cur_line_reader; + _cur_line_reader = nullptr; + } +} + +Status VBrokerScanner::_open_next_reader() { + if (_next_range >= _ranges.size()) { + _scanner_eof = true; + return Status::OK(); + } + + RETURN_IF_ERROR(_open_file_reader()); + RETURN_IF_ERROR(_open_line_reader()); + _next_range++; + + return Status::OK(); +} + +Status VBrokerScanner::_line_to_src_tuple(const Slice& line) { + RETURN_IF_ERROR(_line_split_to_values(line)); + if (!_success) { + return Status::OK(); + } + + for (int i = 0; i < _split_values.size(); ++i) { + auto slot_desc = _src_slot_descs[i]; + const Slice& value = _split_values[i]; + if (slot_desc->is_nullable() && is_null(value)) { + _src_tuple->set_null(slot_desc->null_indicator_offset()); + continue; + } + _src_tuple->set_not_null(slot_desc->null_indicator_offset()); + void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); + str_slot->ptr = value.data; + str_slot->len = value.size; + } + + const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); + if (range.__isset.num_of_columns_from_file) { + fill_slots_of_columns_from_path(range.num_of_columns_from_file, range.columns_from_path); + } + + return Status::OK(); +} + +void VBrokerScanner::split_line(const Slice& line) { + _split_values.clear(); + if (_file_format_type == TFileFormatType::FORMAT_PROTO) { + PDataRow** ptr = reinterpret_cast(line.data); + PDataRow* row = *ptr; + for (const PDataColumn& col : (row)->col()) { + int len = col.value().size(); + uint8_t* buf = new uint8_t[len]; + memcpy(buf, col.value().c_str(), len); + _split_values.emplace_back(buf, len); + } + delete row; + delete[] ptr; + } else { + const char* value = line.data; + size_t start = 0; // point to the start pos of next col value. + size_t curpos = 0; // point to the start pos of separator matching sequence. + size_t p1 = 0; // point to the current pos of separator matching sequence. + size_t non_space = 0; // point to the last pos of non_space character. + + // Separator: AAAA + // + // p1 + // â–¼ + // AAAA + // 1000AAAA2000AAAA + // â–² â–² + // Start │ + // curpos + + while (curpos < line.size) { + if (curpos + p1 == line.size || *(value + curpos + p1) != _value_separator[p1]) { + // Not match, move forward: + curpos += (p1 == 0 ? 1 : p1); + p1 = 0; + } else { + p1++; + if (p1 == _value_separator_length) { + // Match a separator + non_space = curpos; + // Trim tailing spaces. Be consistent with hive and trino's behavior. + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); + start = curpos + _value_separator_length; + curpos = start; + p1 = 0; + non_space = 0; + } + } + } + + CHECK(curpos == line.size) << curpos << " vs " << line.size; + non_space = curpos; + if (_state->trim_tailing_spaces_for_external_table_query()) { + while (non_space > start && *(value + non_space - 1) == ' ') { + non_space--; + } + } + _split_values.emplace_back(value + start, non_space - start); + } +} + +Status VBrokerScanner::_line_split_to_values(const Slice& line) { + bool is_proto_format = _file_format_type == TFileFormatType::FORMAT_PROTO; + if (!is_proto_format && !validate_utf8(line.data, line.size)) { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + []() -> std::string { return "Unable to display"; }, + []() -> std::string { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", "Unable to display"); + return fmt::to_string(error_msg); + }, + &_scanner_eof)); + _counter->num_rows_filtered++; + _success = false; + return Status::OK(); + } + + split_line(line); + + // range of current file + const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); + bool read_by_column_def = false; + if (range.__isset.read_by_column_def) { + read_by_column_def = range.read_by_column_def; + } + const std::vector& columns_from_path = range.columns_from_path; + // read data by column definition, resize _split_values to _src_solt_size + if (read_by_column_def) { + // fill slots by NULL + while (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { + _split_values.emplace_back(_split_values.back().get_data(), 0); + } + // remove redundant slots + while (_split_values.size() + columns_from_path.size() > _src_slot_descs.size()) { + _split_values.pop_back(); + } + } else { + if (_split_values.size() + columns_from_path.size() < _src_slot_descs.size()) { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { + return is_proto_format ? "" : std::string(line.data, line.size); + }, + [&]() -> std::string { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", + "actual column number is less than schema column number."); + fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", + _split_values.size(), _value_separator); + fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", + _line_delimiter, _src_slot_descs.size()); + return fmt::to_string(error_msg); + }, + &_scanner_eof)); + _counter->num_rows_filtered++; + _success = false; + return Status::OK(); + } else if (_split_values.size() + columns_from_path.size() > _src_slot_descs.size()) { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { + return is_proto_format ? "" : std::string(line.data, line.size); + }, + [&]() -> std::string { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", + "actual column number is more than schema column number."); + fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ", + _split_values.size(), _value_separator); + fmt::format_to(error_msg, "line delimiter: [{}], schema number: {}; ", + _line_delimiter, _src_slot_descs.size()); + return fmt::to_string(error_msg); + }, + &_scanner_eof)); + _counter->num_rows_filtered++; + _success = false; + return Status::OK(); + } + } + + _success = true; + return Status::OK(); +} Status VBrokerScanner::get_next(Block* output_block, bool* eof) { SCOPED_TIMER(_read_timer); @@ -47,7 +398,7 @@ Status VBrokerScanner::get_next(Block* output_block, bool* eof) { while (columns[0]->size() < batch_size && !_scanner_eof) { if (_cur_line_reader == nullptr || _cur_line_reader_eof) { - RETURN_IF_ERROR(open_next_reader()); + RETURN_IF_ERROR(_open_next_reader()); // If there isn't any more reader, break this if (_scanner_eof) { continue; diff --git a/be/src/vec/exec/vbroker_scanner.h b/be/src/vec/exec/vbroker_scanner.h index cbd00f859a..2e26eb58b0 100644 --- a/be/src/vec/exec/vbroker_scanner.h +++ b/be/src/vec/exec/vbroker_scanner.h @@ -17,17 +17,35 @@ #pragma once -#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "exec/base_scanner.h" +#include "exec/decompressor.h" +#include "exec/line_reader.h" +#include "exec/text_converter.h" +#include "gen_cpp/PlanNodes_types.h" +#include "gen_cpp/Types_types.h" +#include "io/file_reader.h" +#include "runtime/mem_pool.h" +#include "util/runtime_profile.h" +#include "util/slice.h" namespace doris::vectorized { -class VBrokerScanner final : public BrokerScanner { +class VBrokerScanner final : public BaseScanner { public: VBrokerScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter); - ~VBrokerScanner(); + ~VBrokerScanner() override; + + Status open() override; virtual Status get_next(doris::Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override { @@ -36,9 +54,45 @@ public: Status get_next(Block* block, bool* eof) override; + void close() override; + private: + Status _open_file_reader(); + Status _create_decompressor(TFileFormatType::type type); + Status _open_line_reader(); + // Read next buffer from reader + Status _open_next_reader(); + Status _line_to_src_tuple(const Slice& line); + Status _line_split_to_values(const Slice& line); + // Split one text line to values + void split_line(const Slice& line); + std::unique_ptr _text_converter; Status _fill_dest_columns(const Slice& line, std::vector& columns); + + std::string _value_separator; + std::string _line_delimiter; + TFileFormatType::type _file_format_type; + int _value_separator_length; + int _line_delimiter_length; + + // Reader + // _cur_file_reader_s is for stream load pipe reader, + // and _cur_file_reader is for other file reader. + // TODO: refactor this to use only shared_ptr or unique_ptr + std::unique_ptr _cur_file_reader; + std::shared_ptr _cur_file_reader_s; + FileReader* _real_reader; + LineReader* _cur_line_reader; + Decompressor* _cur_decompressor; + bool _cur_line_reader_eof; + + // When we fetch range start from 0, header_type="csv_with_names" skip first line + // When we fetch range start from 0, header_type="csv_with_names_and_types" skip first two line + // When we fetch range doesn't start from 0 will always skip the first line + int _skip_lines; + + std::vector _split_values; }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/vjson_scanner.cpp b/be/src/vec/exec/vjson_scanner.cpp index cf5722f739..dee107f06a 100644 --- a/be/src/vec/exec/vjson_scanner.cpp +++ b/be/src/vec/exec/vjson_scanner.cpp @@ -22,7 +22,9 @@ #include #include "exec/line_reader.h" +#include "exec/plain_text_line_reader.h" #include "exprs/json_functions.h" +#include "io/file_factory.h" #include "runtime/runtime_state.h" #include "vec/data_types/data_type_string.h" @@ -36,8 +38,45 @@ VJsonScanner::VJsonScanner(RuntimeState* state, RuntimeProfile* prof const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter) - : JsonScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, - counter) {} + : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter), + _cur_file_reader(nullptr), + _cur_file_reader_s(nullptr), + _real_reader(nullptr), + _cur_line_reader(nullptr), + _cur_json_reader(nullptr), + _cur_reader_eof(false), + _read_json_by_line(false) { + if (params.__isset.line_delimiter_length && params.line_delimiter_length > 1) { + _line_delimiter = params.line_delimiter_str; + _line_delimiter_length = params.line_delimiter_length; + } else { + _line_delimiter.push_back(static_cast(params.line_delimiter)); + _line_delimiter_length = 1; + } +} + +template +VJsonScanner::~VJsonScanner() { + close(); +} + +template +Status VJsonScanner::open() { + return BaseScanner::open(); +} + +template +void VJsonScanner::close() { + BaseScanner::close(); + if (_cur_json_reader != nullptr) { + delete _cur_json_reader; + _cur_json_reader = nullptr; + } + if (_cur_line_reader != nullptr) { + delete _cur_line_reader; + _cur_line_reader = nullptr; + } +} template Status VJsonScanner::get_next(vectorized::Block* output_block, bool* eof) { @@ -49,7 +88,7 @@ Status VJsonScanner::get_next(vectorized::Block* output_block, bool* // Get one line while (columns[0]->size() < batch_size && !_scanner_eof) { if (_real_reader == nullptr || _cur_reader_eof) { - RETURN_IF_ERROR(open_next_reader()); + RETURN_IF_ERROR(_open_next_reader()); // If there isn't any more reader, break this if (_scanner_eof) { break; @@ -85,30 +124,30 @@ Status VJsonScanner::get_next(vectorized::Block* output_block, bool* } template -Status VJsonScanner::open_next_reader() { +Status VJsonScanner::_open_next_reader() { if (_next_range >= _ranges.size()) { _scanner_eof = true; return Status::OK(); } - RETURN_IF_ERROR(JsonScanner::open_based_reader()); - RETURN_IF_ERROR(open_vjson_reader()); + RETURN_IF_ERROR(_open_based_reader()); + RETURN_IF_ERROR(_open_vjson_reader()); _next_range++; return Status::OK(); } template -Status VJsonScanner::open_vjson_reader() { +Status VJsonScanner::_open_vjson_reader() { if (_cur_vjson_reader != nullptr) { _cur_vjson_reader.reset(); } - std::string json_root = ""; - std::string jsonpath = ""; + std::string json_root; + std::string jsonpath; bool strip_outer_array = false; bool num_as_string = false; bool fuzzy_parse = false; - RETURN_IF_ERROR(JsonScanner::get_range_params(jsonpath, json_root, strip_outer_array, - num_as_string, fuzzy_parse)); + RETURN_IF_ERROR( + _get_range_params(jsonpath, json_root, strip_outer_array, num_as_string, fuzzy_parse)); _cur_vjson_reader.reset(new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, fuzzy_parse, &_scanner_eof, _read_json_by_line ? nullptr : _real_reader, @@ -118,18 +157,144 @@ Status VJsonScanner::open_vjson_reader() { return Status::OK(); } +template +Status VJsonScanner::_open_based_reader() { + RETURN_IF_ERROR(_open_file_reader()); + if (_read_json_by_line) { + RETURN_IF_ERROR(_open_line_reader()); + } + return Status::OK(); +} + +template +Status VJsonScanner::_open_file_reader() { + const TBrokerRangeDesc& range = _ranges[_next_range]; + int64_t start_offset = range.start_offset; + if (start_offset != 0) { + start_offset -= 1; + } + if (range.__isset.read_json_by_line) { + _read_json_by_line = range.read_json_by_line; + } + + if (range.file_type == TFileType::FILE_STREAM) { + RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s)); + _real_reader = _cur_file_reader_s.get(); + } else { + RETURN_IF_ERROR(FileFactory::create_file_reader( + range.file_type, _state->exec_env(), _profile, _broker_addresses, + _params.properties, range, start_offset, _cur_file_reader)); + _real_reader = _cur_file_reader.get(); + } + _cur_reader_eof = false; + return _real_reader->open(); +} + +template +Status VJsonScanner::_open_line_reader() { + if (_cur_line_reader != nullptr) { + delete _cur_line_reader; + _cur_line_reader = nullptr; + } + + const TBrokerRangeDesc& range = _ranges[_next_range]; + int64_t size = range.size; + if (range.start_offset != 0) { + size += 1; + _skip_next_line = true; + } else { + _skip_next_line = false; + } + _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, nullptr, size, + _line_delimiter, _line_delimiter_length); + _cur_reader_eof = false; + return Status::OK(); +} + +template +Status VJsonScanner::_open_json_reader() { + if (_cur_json_reader != nullptr) { + delete _cur_json_reader; + _cur_json_reader = nullptr; + } + + std::string json_root = ""; + std::string jsonpath = ""; + bool strip_outer_array = false; + bool num_as_string = false; + bool fuzzy_parse = false; + + RETURN_IF_ERROR( + _get_range_params(jsonpath, json_root, strip_outer_array, num_as_string, fuzzy_parse)); + if (_read_json_by_line) { + _cur_json_reader = + new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string, + fuzzy_parse, &_scanner_eof, nullptr, _cur_line_reader); + } else { + _cur_json_reader = new JsonReader(_state, _counter, _profile, strip_outer_array, + num_as_string, fuzzy_parse, &_scanner_eof, _real_reader); + } + + RETURN_IF_ERROR(_cur_json_reader->init(jsonpath, json_root)); + return Status::OK(); +} + +template +Status VJsonScanner::_get_range_params(std::string& jsonpath, std::string& json_root, + bool& strip_outer_array, bool& num_as_string, + bool& fuzzy_parse) { + const TBrokerRangeDesc& range = _ranges[_next_range]; + + if (range.__isset.jsonpaths) { + jsonpath = range.jsonpaths; + } + if (range.__isset.json_root) { + json_root = range.json_root; + } + if (range.__isset.strip_outer_array) { + strip_outer_array = range.strip_outer_array; + } + if (range.__isset.num_as_string) { + num_as_string = range.num_as_string; + } + if (range.__isset.fuzzy_parse) { + fuzzy_parse = range.fuzzy_parse; + } + return Status::OK(); +} + VJsonReader::VJsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, bool strip_outer_array, bool num_as_string, bool fuzzy_parse, bool* scanner_eof, FileReader* file_reader, LineReader* line_reader) - : JsonReader(state, counter, profile, strip_outer_array, num_as_string, fuzzy_parse, - scanner_eof, file_reader, line_reader), - _vhandle_json_callback(nullptr) {} + : _vhandle_json_callback(nullptr), + _next_line(0), + _total_lines(0), + _state(state), + _counter(counter), + _profile(profile), + _file_reader(file_reader), + _line_reader(line_reader), + _closed(false), + _strip_outer_array(strip_outer_array), + _num_as_string(num_as_string), + _fuzzy_parse(fuzzy_parse), + _value_allocator(_value_buffer, sizeof(_value_buffer)), + _parse_allocator(_parse_buffer, sizeof(_parse_buffer)), + _origin_json_doc(&_value_allocator, sizeof(_parse_buffer), &_parse_allocator), + _json_doc(nullptr), + _scanner_eof(scanner_eof) { + _bytes_read_counter = ADD_COUNTER(_profile, "BytesRead", TUnit::BYTES); + _read_timer = ADD_TIMER(_profile, "ReadTime"); + _file_read_timer = ADD_TIMER(_profile, "FileReadTime"); +} -VJsonReader::~VJsonReader() {} +VJsonReader::~VJsonReader() { + _close(); +} Status VJsonReader::init(const std::string& jsonpath, const std::string& json_root) { // generate _parsed_jsonpaths and _parsed_json_root - RETURN_IF_ERROR(JsonReader::_parse_jsonpath_and_json_root(jsonpath, json_root)); + RETURN_IF_ERROR(_parse_jsonpath_and_json_root(jsonpath, json_root)); //improve performance if (_parsed_jsonpaths.empty()) { // input is a simple json-string @@ -360,7 +525,7 @@ Status VJsonReader::_write_data_to_column(rapidjson::Value::ConstValueIterator v return Status::OK(); default: // for other type like array or object. we convert it to string to save - json_str = JsonReader::_print_json_value(*value); + json_str = _print_json_value(*value); wbytes = json_str.size(); str_value = json_str.c_str(); break; @@ -500,7 +665,7 @@ Status VJsonReader::_write_columns_by_jsonpath(rapidjson::Value& objectValue, Status VJsonReader::_parse_json(bool* is_empty_row, bool* eof) { size_t size = 0; - Status st = JsonReader::_parse_json_doc(&size, eof); + Status st = _parse_json_doc(&size, eof); // terminate if encounter other errors RETURN_IF_ERROR(st); @@ -534,7 +699,7 @@ Status VJsonReader::_append_error_msg(const rapidjson::Value& objectValue, std:: } RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { return JsonReader::_print_json_value(objectValue); }, + [&]() -> std::string { return _print_json_value(objectValue); }, [&]() -> std::string { return err_msg; }, _scanner_eof)); _counter->num_rows_filtered++; @@ -1122,6 +1287,321 @@ Status VSIMDJsonReader::_write_columns_by_jsonpath(simdjson::ondemand::value val return Status::OK(); } +Status VJsonReader::_parse_jsonpath_and_json_root(const std::string& jsonpath, + const std::string& json_root) { + // parse jsonpath + if (!jsonpath.empty()) { + RETURN_IF_ERROR(_generate_json_paths(jsonpath, &_parsed_jsonpaths)); + } + if (!json_root.empty()) { + JsonFunctions::parse_json_paths(json_root, &_parsed_json_root); + } + return Status::OK(); +} + +Status VJsonReader::_generate_json_paths(const std::string& jsonpath, + std::vector>* vect) { + rapidjson::Document jsonpaths_doc; + if (!jsonpaths_doc.Parse(jsonpath.c_str(), jsonpath.length()).HasParseError()) { + if (!jsonpaths_doc.IsArray()) { + return Status::InvalidArgument("Invalid json path: {}", jsonpath); + } else { + for (int i = 0; i < jsonpaths_doc.Size(); i++) { + const rapidjson::Value& path = jsonpaths_doc[i]; + if (!path.IsString()) { + return Status::InvalidArgument("Invalid json path: {}", jsonpath); + } + std::vector parsed_paths; + JsonFunctions::parse_json_paths(path.GetString(), &parsed_paths); + vect->push_back(std::move(parsed_paths)); + } + return Status::OK(); + } + } else { + return Status::InvalidArgument("Invalid json path: {}", jsonpath); + } +} + +void VJsonReader::_close() { + if (_closed) { + return; + } + _closed = true; +} + +// read one json string from line reader or file reader and parse it to json doc. +// return Status::DataQualityError() if data has quality error. +// return other error if encounter other problems. +// return Status::OK() if parse succeed or reach EOF. +Status VJsonReader::_parse_json_doc(size_t* size, bool* eof) { + // read a whole message + SCOPED_TIMER(_file_read_timer); + const uint8_t* json_str = nullptr; + std::unique_ptr json_str_ptr; + if (_line_reader != nullptr) { + RETURN_IF_ERROR(_line_reader->read_line(&json_str, size, eof)); + } else { + int64_t length = 0; + RETURN_IF_ERROR(_file_reader->read_one_message(&json_str_ptr, &length)); + json_str = json_str_ptr.get(); + *size = length; + if (length == 0) { + *eof = true; + } + } + + _bytes_read_counter += *size; + if (*eof) { + return Status::OK(); + } + + // clear memory here. + _value_allocator.Clear(); + _parse_allocator.Clear(); + bool has_parse_error = false; + // parse jsondata to JsonDoc + + // As the issue: https://github.com/Tencent/rapidjson/issues/1458 + // Now, rapidjson only support uint64_t, So lagreint load cause bug. We use kParseNumbersAsStringsFlag. + if (_num_as_string) { + has_parse_error = + _origin_json_doc + .Parse((char*)json_str, *size) + .HasParseError(); + } else { + has_parse_error = _origin_json_doc.Parse((char*)json_str, *size).HasParseError(); + } + + if (has_parse_error) { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "Parse json data for JsonDoc failed. code: {}, error info: {}", + _origin_json_doc.GetParseError(), + rapidjson::GetParseError_En(_origin_json_doc.GetParseError())); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return std::string((char*)json_str, *size); }, + [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); + _counter->num_rows_filtered++; + if (*_scanner_eof) { + // Case A: if _scanner_eof is set to true in "append_error_msg_to_file", which means + // we meet enough invalid rows and the scanner should be stopped. + // So we set eof to true and return OK, the caller will stop the process as we meet the end of file. + *eof = true; + return Status::OK(); + } + return Status::DataQualityError(fmt::to_string(error_msg)); + } + + // set json root + if (_parsed_json_root.size() != 0) { + _json_doc = JsonFunctions::get_json_object_from_parsed_json( + _parsed_json_root, &_origin_json_doc, _origin_json_doc.GetAllocator()); + if (_json_doc == nullptr) { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", "JSON Root not found."); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, + [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); + _counter->num_rows_filtered++; + if (*_scanner_eof) { + // Same as Case A + *eof = true; + return Status::OK(); + } + return Status::DataQualityError(fmt::to_string(error_msg)); + } + } else { + _json_doc = &_origin_json_doc; + } + + if (_json_doc->IsArray() && !_strip_outer_array) { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", + "JSON data is array-object, `strip_outer_array` must be TRUE."); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, + [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); + _counter->num_rows_filtered++; + if (*_scanner_eof) { + // Same as Case A + *eof = true; + return Status::OK(); + } + return Status::DataQualityError(fmt::to_string(error_msg)); + } + + if (!_json_doc->IsArray() && _strip_outer_array) { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, "{}", + "JSON data is not an array-object, `strip_outer_array` must be FALSE."); + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(_origin_json_doc); }, + [&]() -> std::string { return fmt::to_string(error_msg); }, _scanner_eof)); + _counter->num_rows_filtered++; + if (*_scanner_eof) { + // Same as Case A + *eof = true; + return Status::OK(); + } + return Status::DataQualityError(fmt::to_string(error_msg)); + } + + return Status::OK(); +} + +std::string VJsonReader::_print_json_value(const rapidjson::Value& value) { + rapidjson::StringBuffer buffer; + buffer.Clear(); + rapidjson::Writer writer(buffer); + value.Accept(writer); + return std::string(buffer.GetString()); +} + +void VJsonReader::_fill_slot(doris::Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, + const uint8_t* value, int32_t len) { + tuple->set_not_null(slot_desc->null_indicator_offset()); + void* slot = tuple->get_slot(slot_desc->tuple_offset()); + StringValue* str_slot = reinterpret_cast(slot); + str_slot->ptr = reinterpret_cast(mem_pool->allocate(len)); + memcpy(str_slot->ptr, value, len); + str_slot->len = len; +} + +Status VJsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, + SlotDescriptor* desc, doris::Tuple* tuple, + MemPool* tuple_pool, bool* valid) { + const char* str_value = nullptr; + uint8_t tmp_buf[128] = {0}; + int32_t wbytes = 0; + switch (value->GetType()) { + case rapidjson::Type::kStringType: + str_value = value->GetString(); + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)str_value, strlen(str_value)); + break; + case rapidjson::Type::kNumberType: + if (value->IsUint()) { + wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%u", value->GetUint()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsInt()) { + wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value->GetInt()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsUint64()) { + wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRIu64, value->GetUint64()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else if (value->IsInt64()) { + wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRId64, value->GetInt64()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } else { + wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%f", value->GetDouble()); + _fill_slot(tuple, desc, tuple_pool, tmp_buf, wbytes); + } + break; + case rapidjson::Type::kFalseType: + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"0", 1); + break; + case rapidjson::Type::kTrueType: + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)"1", 1); + break; + case rapidjson::Type::kNullType: + if (desc->is_nullable()) { + tuple->set_null(desc->null_indicator_offset()); + } else { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(*value); }, + [&]() -> std::string { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + "Json value is null, but the column `{}` is not nullable.", + desc->col_name()); + return fmt::to_string(error_msg); + }, + _scanner_eof)); + _counter->num_rows_filtered++; + *valid = false; + return Status::OK(); + } + break; + default: + // for other type like array or object. we convert it to string to save + std::string json_str = _print_json_value(*value); + _fill_slot(tuple, desc, tuple_pool, (uint8_t*)json_str.c_str(), json_str.length()); + break; + } + *valid = true; + return Status::OK(); +} + +// for simple format json +// set valid to true and return OK if succeed. +// set valid to false and return OK if we met an invalid row. +// return other status if encounter other problmes. +Status VJsonReader::_set_tuple_value(rapidjson::Value& objectValue, doris::Tuple* tuple, + const std::vector& slot_descs, + MemPool* tuple_pool, bool* valid) { + if (!objectValue.IsObject()) { + // Here we expect the incoming `objectValue` to be a Json Object, such as {"key" : "value"}, + // not other type of Json format. + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, + [&]() -> std::string { return "Expect json object value"; }, _scanner_eof)); + _counter->num_rows_filtered++; + *valid = false; // current row is invalid + return Status::OK(); + } + + int nullcount = 0; + for (auto v : slot_descs) { + rapidjson::Value::ConstMemberIterator it = objectValue.MemberEnd(); + if (_fuzzy_parse) { + auto idx_it = _name_map.find(v->col_name()); + if (idx_it != _name_map.end() && idx_it->second < objectValue.MemberCount()) { + it = objectValue.MemberBegin() + idx_it->second; + } + } else { + it = objectValue.FindMember( + rapidjson::Value(v->col_name().c_str(), v->col_name().size())); + } + if (it != objectValue.MemberEnd()) { + const rapidjson::Value& value = it->value; + RETURN_IF_ERROR(_write_data_to_tuple(&value, v, tuple, tuple_pool, valid)); + if (!(*valid)) { + return Status::OK(); + } + } else { // not found + if (v->is_nullable()) { + tuple->set_null(v->null_indicator_offset()); + nullcount++; + } else { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, + [&]() -> std::string { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + "The column `{}` is not nullable, but it's not found in " + "jsondata.", + v->col_name()); + return fmt::to_string(error_msg); + }, + _scanner_eof)); + _counter->num_rows_filtered++; + *valid = false; // current row is invalid + break; + } + } + } + + if (nullcount == slot_descs.size()) { + RETURN_IF_ERROR(_state->append_error_msg_to_file( + [&]() -> std::string { return _print_json_value(objectValue); }, + [&]() -> std::string { return "All fields is null, this is a invalid row."; }, + _scanner_eof)); + _counter->num_rows_filtered++; + *valid = false; + return Status::OK(); + } + *valid = true; + return Status::OK(); +} + template class VJsonScanner; template class VJsonScanner; } // namespace doris::vectorized diff --git a/be/src/vec/exec/vjson_scanner.h b/be/src/vec/exec/vjson_scanner.h index bdac179644..710d540d14 100644 --- a/be/src/vec/exec/vjson_scanner.h +++ b/be/src/vec/exec/vjson_scanner.h @@ -32,7 +32,9 @@ #include "common/status.h" #include "exec/base_scanner.h" -#include "exec/json_scanner.h" +#include "exec/line_reader.h" +#include "exprs/json_functions.h" +#include "io/file_reader.h" #include "runtime/descriptors.h" #include "util/runtime_profile.h" @@ -45,28 +47,62 @@ namespace vectorized { class VJsonReader; template -class VJsonScanner : public JsonScanner { +class VJsonScanner : public BaseScanner { public: VJsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter); + ~VJsonScanner() override; + + // Open this scanner, will initialize information needed + Status open() override; + Status get_next(doris::Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override { return Status::NotSupported("Not Implemented get tuple"); } Status get_next(vectorized::Block* output_block, bool* eof) override; -private: - Status open_vjson_reader(); - Status open_next_reader(); + void close() override; private: + Status _open_vjson_reader(); + Status _open_next_reader(); + + Status _open_file_reader(); + Status _open_line_reader(); + Status _open_json_reader(); + + Status _open_based_reader(); + Status _get_range_params(std::string& jsonpath, std::string& json_root, bool& strip_outer_array, + bool& num_as_string, bool& fuzzy_parse); + std::string _jsonpath; + std::string _jsonpath_file; + + std::string _line_delimiter; + int _line_delimiter_length; + + // Reader + // _cur_file_reader_s is for stream load pipe reader, + // and _cur_file_reader is for other file reader. + // TODO: refactor this to use only shared_ptr or unique_ptr + std::unique_ptr _cur_file_reader; + std::shared_ptr _cur_file_reader_s; + FileReader* _real_reader; + LineReader* _cur_line_reader; + JsonReader* _cur_json_reader; + bool _cur_reader_eof; + bool _read_json_by_line; + + // When we fetch range doesn't start from 0, + // we will read to one ahead, and skip the first line + bool _skip_next_line; std::unique_ptr _cur_vjson_reader = nullptr; }; -class VJsonReader : public JsonReader { +class VJsonReader { public: VJsonReader(RuntimeState* state, ScannerCounter* counter, RuntimeProfile* profile, bool strip_outer_array, bool num_as_string, bool fuzzy_parse, bool* scanner_eof, @@ -112,6 +148,53 @@ private: Status _append_error_msg(const rapidjson::Value& objectValue, std::string error_msg, std::string col_name, bool* valid); + + void _fill_slot(doris::Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, + const uint8_t* value, int32_t len); + Status _parse_json_doc(size_t* size, bool* eof); + Status _set_tuple_value(rapidjson::Value& objectValue, doris::Tuple* tuple, + const std::vector& slot_descs, MemPool* tuple_pool, + bool* valid); + Status _write_data_to_tuple(rapidjson::Value::ConstValueIterator value, SlotDescriptor* desc, + doris::Tuple* tuple, MemPool* tuple_pool, bool* valid); + std::string _print_json_value(const rapidjson::Value& value); + + void _close(); + Status _generate_json_paths(const std::string& jsonpath, + std::vector>* vect); + Status _parse_jsonpath_and_json_root(const std::string& jsonpath, const std::string& json_root); + + int _next_line; + int _total_lines; + RuntimeState* _state; + ScannerCounter* _counter; + RuntimeProfile* _profile; + FileReader* _file_reader; + LineReader* _line_reader; + bool _closed; + bool _strip_outer_array; + bool _num_as_string; + bool _fuzzy_parse; + RuntimeProfile::Counter* _bytes_read_counter; + RuntimeProfile::Counter* _read_timer; + RuntimeProfile::Counter* _file_read_timer; + + std::vector> _parsed_jsonpaths; + std::vector _parsed_json_root; + + char _value_buffer[4 * 1024 * 1024]; + char _parse_buffer[512 * 1024]; + + using Document = rapidjson::GenericDocument, rapidjson::MemoryPoolAllocator<>, + rapidjson::MemoryPoolAllocator<>>; + rapidjson::MemoryPoolAllocator<> _value_allocator; + rapidjson::MemoryPoolAllocator<> _parse_allocator; + Document _origin_json_doc; // origin json document object from parsed json string + rapidjson::Value* _json_doc; // _json_doc equals _final_json_doc iff not set `json_root` + std::unordered_map _name_map; + + // point to the _scanner_eof of JsonScanner + bool* _scanner_eof; }; class VSIMDJsonReader { diff --git a/be/src/vec/exec/vmysql_scan_node.h b/be/src/vec/exec/vmysql_scan_node.h index aa364de900..2fd8240956 100644 --- a/be/src/vec/exec/vmysql_scan_node.h +++ b/be/src/vec/exec/vmysql_scan_node.h @@ -19,10 +19,10 @@ #include -#include "exec/mysql_scanner.h" #include "exec/scan_node.h" #include "exec/text_converter.h" #include "runtime/descriptors.h" +#include "vec/exec/scan/mysql_scanner.h" namespace doris { class TextConverter; diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index c26bfdba21..8813bce081 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -25,7 +25,7 @@ namespace doris::vectorized { VTableFunctionNode::VTableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : TableFunctionNode(pool, tnode, descs) {} + : ExecNode(pool, tnode, descs) {} Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(ExecNode::init(tnode, state)); @@ -51,9 +51,46 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { return Status::OK(); } +Status VTableFunctionNode::_prepare_output_slot_ids(const TPlanNode& tnode) { + // Prepare output slot ids + if (tnode.table_function_node.outputSlotIds.empty()) { + return Status::InternalError("Output slots of table function node is empty"); + } + SlotId max_id = -1; + for (auto slot_id : tnode.table_function_node.outputSlotIds) { + if (slot_id > max_id) { + max_id = slot_id; + } + } + _output_slot_ids = std::vector(max_id + 1, false); + for (auto slot_id : tnode.table_function_node.outputSlotIds) { + _output_slot_ids[slot_id] = true; + } + + return Status::OK(); +} + +bool VTableFunctionNode::_is_inner_and_empty() { + for (int i = 0; i < _fn_num; i++) { + // if any table function is not outer and has empty result, go to next child row + if (!_fns[i]->is_outer() && _fns[i]->current_empty()) { + return true; + } + } + return false; +} + Status VTableFunctionNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - RETURN_IF_ERROR(TableFunctionNode::prepare(state)); + RETURN_IF_ERROR(ExecNode::prepare(state)); + SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); + + _num_rows_filtered_counter = ADD_COUNTER(_runtime_profile, "RowsFiltered", TUnit::UNIT); + + RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor)); + for (auto fn : _fns) { + RETURN_IF_ERROR(fn->prepare()); + } RETURN_IF_ERROR(VExpr::prepare(_vfn_ctxs, state, _row_descriptor)); // get current all output slots @@ -220,4 +257,63 @@ Status VTableFunctionNode::_process_next_child_row() { return Status::OK(); } +// Returns the index of fn of the last eos counted from back to front +// eg: there are 3 functions in `_fns` +// eos: false, true, true +// return: 1 +// +// eos: false, false, true +// return: 2 +// +// eos: false, false, false +// return: -1 +// +// eos: true, true, true +// return: 0 +// +// return: +// 0: all fns are eos +// -1: all fns are not eos +// >0: some of fns are eos +int VTableFunctionNode::_find_last_fn_eos_idx() { + for (int i = _fn_num - 1; i >= 0; --i) { + if (!_fns[i]->eos()) { + if (i == _fn_num - 1) { + return -1; + } else { + return i + 1; + } + } + } + // all eos + return 0; +} + +// Roll to reset the table function. +// Eg: +// There are 3 functions f1, f2 and f3 in `_fns`. +// If `last_eos_idx` is 1, which means f2 and f3 are eos. +// So we need to forward f1, and reset f2 and f3. +bool VTableFunctionNode::_roll_table_functions(int last_eos_idx) { + bool fn_eos = false; + int i = last_eos_idx - 1; + for (; i >= 0; --i) { + _fns[i]->forward(&fn_eos); + if (!fn_eos) { + break; + } + } + if (i == -1) { + // after forward, all functions are eos. + // we should process next child row to get more table function results. + return false; + } + + for (int j = i + 1; j < _fn_num; ++j) { + _fns[j]->reset(); + } + + return true; +} + } // namespace doris::vectorized diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index c831e55856..85c0fca5be 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -17,21 +17,39 @@ #pragma once -#include "exec/table_function_node.h" +#include "exec/exec_node.h" +#include "exprs/expr.h" #include "exprs/table_function/table_function.h" +#include "vec/exprs/vexpr.h" namespace doris::vectorized { -class VTableFunctionNode : public TableFunctionNode { +class VTableFunctionNode : public ExecNode { public: VTableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); ~VTableFunctionNode() override = default; Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; Status prepare(RuntimeState* state) override; + Status open(RuntimeState* state) override { + START_AND_SCOPE_SPAN(state->get_tracer(), span, "TableFunctionNode::open"); + RETURN_IF_ERROR(alloc_resource(state)); + return _children[0]->open(state); + } Status get_next(RuntimeState* state, Block* block, bool* eos) override; + bool need_more_input_data() { return !_child_block.rows() && !_child_eos; } + void release_resource(doris::RuntimeState* state) override { + Expr::close(_fn_ctxs, state); + vectorized::VExpr::close(_vfn_ctxs, state); + + if (_num_rows_filtered_counter != nullptr) { + COUNTER_SET(_num_rows_filtered_counter, static_cast(_num_rows_filtered)); + } + ExecNode::release_resource(state); + } + Status push(RuntimeState*, vectorized::Block* input_block, bool eos) override { _child_eos = eos; if (input_block->rows() == 0) { @@ -54,7 +72,18 @@ public: Block* get_child_block() { return &_child_block; } private: - Status _process_next_child_row() override; + Status _prepare_output_slot_ids(const TPlanNode& tnode); + bool _is_inner_and_empty(); + + // return: + // 0: all fns are eos + // -1: all fns are not eos + // >0: some of fns are eos + int _find_last_fn_eos_idx(); + + bool _roll_table_functions(int last_eos_idx); + + Status _process_next_child_row(); /* Now the output tuples for table function node is base_table_tuple + tf1 + tf2 + ... But not all slots are used, the real used slots are inside table_function_node.outputSlotIds. @@ -76,13 +105,30 @@ private: return (id < _output_slot_ids.size()) && (_output_slot_ids[id]); } - using TableFunctionNode::get_next; - Status get_expanded_block(RuntimeState* state, Block* output_block, bool* eos); Block _child_block; std::vector _child_slots; std::vector _output_slots; + int64_t _cur_child_offset = 0; + std::shared_ptr _cur_child_batch; + + std::vector _fn_ctxs; + std::vector _vfn_ctxs; + + std::vector _fns; + std::vector _fn_values; + std::vector _fn_value_lengths; + int _fn_num = 0; + + std::vector _output_slot_ids; + + std::vector _child_slot_sizes; + // indicate if child node reach the end + bool _child_eos = false; + + RuntimeProfile::Counter* _num_rows_filtered_counter = nullptr; + uint64_t _num_rows_filtered = 0; }; } // namespace doris::vectorized diff --git a/be/src/vec/runtime/vfile_result_writer.h b/be/src/vec/runtime/vfile_result_writer.h index 31bbc5a55f..37f37ae159 100644 --- a/be/src/vec/runtime/vfile_result_writer.h +++ b/be/src/vec/runtime/vfile_result_writer.h @@ -18,7 +18,6 @@ #pragma once #include "io/file_writer.h" -#include "runtime/file_result_writer.h" #include "vec/runtime/vparquet_writer.h" #include "vec/sink/vresult_sink.h" diff --git a/be/src/vec/runtime/vparquet_writer.cpp b/be/src/vec/runtime/vparquet_writer.cpp index 4a80d65c7b..3431e0ee7a 100644 --- a/be/src/vec/runtime/vparquet_writer.cpp +++ b/be/src/vec/runtime/vparquet_writer.cpp @@ -21,7 +21,6 @@ #include #include -#include "exec/parquet_writer.h" #include "io/file_writer.h" #include "util/mysql_global.h" #include "util/types.h" @@ -36,6 +35,175 @@ namespace doris::vectorized { +ParquetOutputStream::ParquetOutputStream(FileWriter* file_writer) + : _file_writer(file_writer), _cur_pos(0), _written_len(0) { + set_mode(arrow::io::FileMode::WRITE); +} + +ParquetOutputStream::~ParquetOutputStream() { + arrow::Status st = Close(); + if (!st.ok()) { + LOG(WARNING) << "close parquet file error: " << st.ToString(); + } +} + +arrow::Status ParquetOutputStream::Write(const void* data, int64_t nbytes) { + if (_is_closed) { + return arrow::Status::OK(); + } + size_t written_len = 0; + Status st = _file_writer->write(static_cast(data), nbytes, &written_len); + if (!st.ok()) { + return arrow::Status::IOError(st.to_string()); + } + _cur_pos += written_len; + _written_len += written_len; + return arrow::Status::OK(); +} + +arrow::Result ParquetOutputStream::Tell() const { + return _cur_pos; +} + +arrow::Status ParquetOutputStream::Close() { + if (_is_closed) { + return arrow::Status::OK(); + } + Status st = _file_writer->close(); + if (!st.ok()) { + LOG(WARNING) << "close parquet output stream failed: " << st; + return arrow::Status::IOError(st.to_string()); + } + _is_closed = true; + return arrow::Status::OK(); +} + +int64_t ParquetOutputStream::get_written_len() const { + return _written_len; +} + +void ParquetOutputStream::set_written_len(int64_t written_len) { + _written_len = written_len; +} + +void ParquetBuildHelper::build_schema_repetition_type( + parquet::Repetition::type& parquet_repetition_type, + const TParquetRepetitionType::type& column_repetition_type) { + switch (column_repetition_type) { + case TParquetRepetitionType::REQUIRED: { + parquet_repetition_type = parquet::Repetition::REQUIRED; + break; + } + case TParquetRepetitionType::REPEATED: { + parquet_repetition_type = parquet::Repetition::REPEATED; + break; + } + case TParquetRepetitionType::OPTIONAL: { + parquet_repetition_type = parquet::Repetition::OPTIONAL; + break; + } + default: + parquet_repetition_type = parquet::Repetition::UNDEFINED; + } +} + +void ParquetBuildHelper::build_schema_data_type(parquet::Type::type& parquet_data_type, + const TParquetDataType::type& column_data_type) { + switch (column_data_type) { + case TParquetDataType::BOOLEAN: { + parquet_data_type = parquet::Type::BOOLEAN; + break; + } + case TParquetDataType::INT32: { + parquet_data_type = parquet::Type::INT32; + break; + } + case TParquetDataType::INT64: { + parquet_data_type = parquet::Type::INT64; + break; + } + case TParquetDataType::INT96: { + parquet_data_type = parquet::Type::INT96; + break; + } + case TParquetDataType::BYTE_ARRAY: { + parquet_data_type = parquet::Type::BYTE_ARRAY; + break; + } + case TParquetDataType::FLOAT: { + parquet_data_type = parquet::Type::FLOAT; + break; + } + case TParquetDataType::DOUBLE: { + parquet_data_type = parquet::Type::DOUBLE; + break; + } + case TParquetDataType::FIXED_LEN_BYTE_ARRAY: { + parquet_data_type = parquet::Type::FIXED_LEN_BYTE_ARRAY; + break; + } + default: + parquet_data_type = parquet::Type::UNDEFINED; + } +} + +void ParquetBuildHelper::build_compression_type( + parquet::WriterProperties::Builder& builder, + const TParquetCompressionType::type& compression_type) { + switch (compression_type) { + case TParquetCompressionType::SNAPPY: { + builder.compression(parquet::Compression::SNAPPY); + break; + } + case TParquetCompressionType::GZIP: { + builder.compression(parquet::Compression::GZIP); + break; + } + case TParquetCompressionType::BROTLI: { + builder.compression(parquet::Compression::BROTLI); + break; + } + case TParquetCompressionType::ZSTD: { + builder.compression(parquet::Compression::ZSTD); + break; + } + case TParquetCompressionType::LZ4: { + builder.compression(parquet::Compression::LZ4); + break; + } + case TParquetCompressionType::LZO: { + builder.compression(parquet::Compression::LZO); + break; + } + case TParquetCompressionType::BZ2: { + builder.compression(parquet::Compression::BZ2); + break; + } + case TParquetCompressionType::UNCOMPRESSED: { + builder.compression(parquet::Compression::UNCOMPRESSED); + break; + } + default: + builder.compression(parquet::Compression::UNCOMPRESSED); + } +} + +void ParquetBuildHelper::build_version(parquet::WriterProperties::Builder& builder, + const TParquetVersion::type& parquet_version) { + switch (parquet_version) { + case TParquetVersion::PARQUET_1_0: { + builder.version(parquet::ParquetVersion::PARQUET_1_0); + break; + } + case TParquetVersion::PARQUET_2_LATEST: { + builder.version(parquet::ParquetVersion::PARQUET_2_LATEST); + break; + } + default: + builder.version(parquet::ParquetVersion::PARQUET_1_0); + } +} + VParquetWriterWrapper::VParquetWriterWrapper(doris::FileWriter* file_writer, const std::vector& output_vexpr_ctxs, const std::vector& parquet_schemas, diff --git a/be/src/vec/runtime/vparquet_writer.h b/be/src/vec/runtime/vparquet_writer.h index 2c5d0e102b..5537885a11 100644 --- a/be/src/vec/runtime/vparquet_writer.h +++ b/be/src/vec/runtime/vparquet_writer.h @@ -34,13 +34,52 @@ #include #include "common/status.h" -#include "exec/parquet_writer.h" #include "vec/core/block.h" #include "vec/exprs/vexpr_context.h" #include "vec/runtime/vfile_result_writer.h" namespace doris::vectorized { +class ParquetOutputStream : public arrow::io::OutputStream { +public: + ParquetOutputStream(FileWriter* file_writer); + ParquetOutputStream(FileWriter* file_writer, const int64_t& written_len); + ~ParquetOutputStream() override; + + arrow::Status Write(const void* data, int64_t nbytes) override; + // return the current write position of the stream + arrow::Result Tell() const override; + arrow::Status Close() override; + + bool closed() const override { return _is_closed; } + + int64_t get_written_len() const; + + void set_written_len(int64_t written_len); + +private: + FileWriter* _file_writer; // not owned + int64_t _cur_pos = 0; // current write position + bool _is_closed = false; + int64_t _written_len = 0; +}; + +class ParquetBuildHelper { +public: + static void build_schema_repetition_type( + parquet::Repetition::type& parquet_repetition_type, + const TParquetRepetitionType::type& column_repetition_type); + + static void build_schema_data_type(parquet::Type::type& parquet_data_type, + const TParquetDataType::type& column_data_type); + + static void build_compression_type(parquet::WriterProperties::Builder& builder, + const TParquetCompressionType::type& compression_type); + + static void build_version(parquet::WriterProperties::Builder& builder, + const TParquetVersion::type& parquet_version); +}; + class VFileWriterWrapper { public: VFileWriterWrapper(const std::vector& output_vexpr_ctxs, bool output_object_data) diff --git a/be/src/vec/runtime/vsorted_run_merger.h b/be/src/vec/runtime/vsorted_run_merger.h index e374f2cdc0..974b2f6096 100644 --- a/be/src/vec/runtime/vsorted_run_merger.h +++ b/be/src/vec/runtime/vsorted_run_merger.h @@ -19,8 +19,6 @@ #include -#include "common/object_pool.h" -#include "util/tuple_row_compare.h" #include "vec/core/sort_cursor.h" namespace doris { diff --git a/be/src/vec/sink/vresult_file_sink.cpp b/be/src/vec/sink/vresult_file_sink.cpp index 51c6673fd0..b63ebf160f 100644 --- a/be/src/vec/sink/vresult_file_sink.cpp +++ b/be/src/vec/sink/vresult_file_sink.cpp @@ -20,7 +20,6 @@ #include "common/config.h" #include "runtime/buffer_control_block.h" #include "runtime/exec_env.h" -#include "runtime/file_result_writer.h" #include "runtime/result_buffer_mgr.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" diff --git a/be/src/vec/sink/vresult_file_sink.h b/be/src/vec/sink/vresult_file_sink.h index a5ce85bf4b..33d454f0bc 100644 --- a/be/src/vec/sink/vresult_file_sink.h +++ b/be/src/vec/sink/vresult_file_sink.h @@ -17,8 +17,8 @@ #pragma once -#include "runtime/result_file_sink.h" #include "vec/sink/vdata_stream_sender.h" +#include "vec/sink/vresult_sink.h" namespace doris { namespace vectorized { diff --git a/be/src/vec/sink/vresult_sink.cpp b/be/src/vec/sink/vresult_sink.cpp index c1da47760a..2521636c6a 100644 --- a/be/src/vec/sink/vresult_sink.cpp +++ b/be/src/vec/sink/vresult_sink.cpp @@ -19,7 +19,6 @@ #include "runtime/buffer_control_block.h" #include "runtime/exec_env.h" -#include "runtime/file_result_writer.h" #include "runtime/result_buffer_mgr.h" #include "runtime/runtime_state.h" #include "vec/exprs/vexpr.h" diff --git a/be/src/vec/sink/vresult_sink.h b/be/src/vec/sink/vresult_sink.h index 1e71286118..4b63c48f95 100644 --- a/be/src/vec/sink/vresult_sink.h +++ b/be/src/vec/sink/vresult_sink.h @@ -35,6 +35,75 @@ class ResultSinkOperator; namespace vectorized { class VExprContext; +struct ResultFileOptions { + // [[deprecated]] + bool is_local_file; + std::string file_path; + TFileFormatType::type file_format; + std::string column_separator; + std::string line_delimiter; + size_t max_file_size_bytes = 1 * 1024 * 1024 * 1024; // 1GB + std::vector broker_addresses; + std::map broker_properties; + std::string success_file_name; + std::vector> schema; //not use in outfile with parquet format + std::map file_properties; //not use in outfile with parquet format + + std::vector parquet_schemas; + TParquetCompressionType::type parquet_commpression_type; + TParquetVersion::type parquet_version; + bool parquert_disable_dictionary; + //note: use outfile with parquet format, have deprecated 9:schema and 10:file_properties + //But in order to consider the compatibility when upgrading, so add a bool to check + //Now the code version is 1.1.2, so when the version is after 1.2, could remove this code. + bool is_refactor_before_flag = false; + std::string orc_schema; + + ResultFileOptions(const TResultFileSinkOptions& t_opt) { + file_path = t_opt.file_path; + file_format = t_opt.file_format; + column_separator = t_opt.__isset.column_separator ? t_opt.column_separator : "\t"; + line_delimiter = t_opt.__isset.line_delimiter ? t_opt.line_delimiter : "\n"; + max_file_size_bytes = + t_opt.__isset.max_file_size_bytes ? t_opt.max_file_size_bytes : max_file_size_bytes; + + is_local_file = true; + if (t_opt.__isset.broker_addresses) { + broker_addresses = t_opt.broker_addresses; + is_local_file = false; + } + if (t_opt.__isset.broker_properties) { + broker_properties = t_opt.broker_properties; + } + if (t_opt.__isset.success_file_name) { + success_file_name = t_opt.success_file_name; + } + if (t_opt.__isset.schema) { + schema = t_opt.schema; + is_refactor_before_flag = true; + } + if (t_opt.__isset.file_properties) { + file_properties = t_opt.file_properties; + } + if (t_opt.__isset.parquet_schemas) { + is_refactor_before_flag = false; + parquet_schemas = t_opt.parquet_schemas; + } + if (t_opt.__isset.parquet_compression_type) { + parquet_commpression_type = t_opt.parquet_compression_type; + } + if (t_opt.__isset.parquet_disable_dictionary) { + parquert_disable_dictionary = t_opt.parquet_disable_dictionary; + } + if (t_opt.__isset.parquet_version) { + parquet_version = t_opt.parquet_version; + } + if (t_opt.__isset.orc_schema) { + orc_schema = t_opt.orc_schema; + } + } +}; + class VResultSink : public DataSink { public: friend class pipeline::ResultSinkOperator; diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index da1b5dc86c..5a45baca56 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -37,57 +37,8 @@ set(ENV_TEST_FILES ) set(EXEC_TEST_FILES - exec/hash_table_test.cpp - exec/olap_common_test.cpp - exec/json_scanner_test.cpp - exec/json_scanner_with_jsonpath_test.cpp - exec/parquet_scanner_test.cpp - exec/orc_scanner_test.cpp - exec/plain_text_line_reader_uncompressed_test.cpp - exec/plain_text_line_reader_gzip_test.cpp - exec/plain_text_line_reader_bzip_test.cpp - exec/plain_text_line_reader_lz4frame_test.cpp - exec/broker_scanner_test.cpp - exec/broker_scan_node_test.cpp - exec/tablet_info_test.cpp - exec/tablet_sink_test.cpp - exec/buffered_reader_test.cpp - exec/es_http_scan_node_test.cpp - exec/es_predicate_test.cpp - exec/es_query_builder_test.cpp - exec/es_scan_reader_test.cpp - exec/s3_reader_test.cpp - exec/multi_bytes_separator_test.cpp - exec/hdfs_file_reader_test.cpp vec/exec/parquet/parquet_thrift_test.cpp vec/exec/parquet/parquet_reader_test.cpp - # exec/new_olap_scan_node_test.cpp - # exec/pre_aggregation_node_test.cpp - # exec/partitioned_hash_table_test.cpp - # exec/olap_scanner_test.cpp - # exec/olap_meta_reader_test.cpp - # exec/olap_scan_node_test.cpp - # exec/mysql_scan_node_test.cpp - # exec/mysql_scanner_test.cpp - # exec/csv_scanner_test.cpp - # exec/csv_scan_node_test.cpp - # exec/csv_scan_bench_test.cpp - # exec/schema_scan_node_test.cpp - # exec/unix_odbc_test.cpp - # exec/schema_scanner_test.cpp - # exec/set_executor_test.cpp - # exec/schema_scanner/schema_authors_scanner_test.cpp - # exec/schema_scanner/schema_columns_scanner_test.cpp - # exec/schema_scanner/schema_create_table_scanner_test.cpp - # exec/schema_scanner/schema_open_tables_scanner_test.cpp - # exec/schema_scanner/schema_schemata_scanner_test.cpp - # exec/schema_scanner/schema_table_names_scanner_test.cpp - # exec/schema_scanner/schema_tables_scanner_test.cpp - # exec/schema_scanner/schema_variables_scanner_test.cpp - # exec/schema_scanner/schema_engines_scanner_test.cpp - # exec/schema_scanner/schema_collations_scanner_test.cpp - # exec/schema_scanner/schema_charsets_scanner_test.cpp - # exec/broker_reader_test.cpp ) if(DEFINED DORIS_WITH_LZO) @@ -198,8 +149,6 @@ set(OLAP_TEST_FILES olap/options_test.cpp olap/common_test.cpp olap/primary_key_index_test.cpp - # olap/memtable_flush_executor_test.cpp - # olap/push_handler_test.cpp olap/tablet_cooldown_test.cpp olap/rowid_conversion_test.cpp olap/remote_rowset_gc_test.cpp @@ -209,7 +158,6 @@ set(OLAP_TEST_FILES set(RUNTIME_TEST_FILES # runtime/buffered_tuple_stream_test.cpp - # runtime/sorter_test.cpp # runtime/buffer_control_block_test.cpp # runtime/result_buffer_mgr_test.cpp # runtime/result_sink_test.cpp @@ -242,7 +190,6 @@ set(RUNTIME_TEST_FILES runtime/small_file_mgr_test.cpp runtime/heartbeat_flags_test.cpp runtime/result_queue_mgr_test.cpp - runtime/memory_scratch_sink_test.cpp runtime/test_env.cc runtime/external_scan_context_mgr_test.cpp runtime/memory/chunk_allocator_test.cpp @@ -290,9 +237,6 @@ set(UTIL_TEST_FILES util/rle_encoding_test.cpp util/tdigest_test.cpp util/block_compression_test.cpp - util/arrow/arrow_row_block_test.cpp - util/arrow/arrow_row_batch_test.cpp - util/arrow/arrow_work_flow_test.cpp util/counter_cond_variable_test.cpp util/frame_of_reference_coding_test.cpp util/bit_stream_utils_test.cpp diff --git a/be/test/exec/broker_reader_test.cpp b/be/test/exec/broker_reader_test.cpp deleted file mode 100644 index 0923b4edb4..0000000000 --- a/be/test/exec/broker_reader_test.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "io/broker_reader.h" - -#include - -#include -#include -#include - -#include "common/status.h" -#include "gen_cpp/PaloBrokerService_types.h" -#include "gen_cpp/TPaloBrokerService.h" -#include "util/cpu_info.h" -#include "util/stopwatch.hpp" - -namespace doris { - -class RuntimeState; - -class BrokerReaderTest : public testing::Test { -public: - BrokerReaderTest() { init(); } - void init(); - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - ExecEnv* _env; - std::map _properties; - std::vector _addresses; -}; - -void BrokerReaderTest::init() { - _properties["username"] = "root"; - _properties["password"] = "passwd"; - TNetworkAddress addr; - addr.__set_hostname("host"); - addr.__set_port(9999); - _addresses.push_back(addr); -} - -TEST_F(BrokerReaderTest, normal) { - std::string path = "hdfs://host:port/dir"; - BrokerReader reader(_env, _addresses, _properties, path, 0); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - uint8_t buf[128 * 1024]; - MonotonicStopWatch watch; - watch.start(); - bool eof = false; - size_t total_size = 0; - while (!eof) { - size_t buf_len = 128 * 1024; - st = reader.read(buf, &buf_len, &eof); - EXPECT_TRUE(st.ok()); - total_size += buf_len; - } - LOG(INFO) << "get from broker " << total_size << " bytes using " << watch.elapsed_time(); -} - -} // end namespace doris diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp deleted file mode 100644 index 5ca63810f2..0000000000 --- a/be/test/exec/broker_scan_node_test.cpp +++ /dev/null @@ -1,487 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/broker_scan_node.h" - -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exprs/cast_functions.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class BrokerScanNodeTest : public testing::Test { -public: - BrokerScanNodeTest() : _runtime_state(TQueryGlobals()) { - init(); - _runtime_state.init_mem_trackers(); - } - void init(); - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - void init_desc_table(); - RuntimeState _runtime_state; - ObjectPool _obj_pool; - std::map _slots_map; - TBrokerScanRangeParams _params; - DescriptorTbl* _desc_tbl; - TPlanNode _tnode; -}; - -void BrokerScanNodeTest::init_desc_table() { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::MYSQL_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - int next_slot_id = 1; - // TSlotDescriptor - // int offset = 1; - // int i = 0; - // k1 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = 0; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k1"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k2 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 4; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k2"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k3 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 8; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k3"; - slot_desc.slotIdx = 3; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k4(partitioned column) - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 12; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k4"; - slot_desc.slotIdx = 4; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - t_desc_table.__isset.slotDescriptors = true; - { - // TTupleDescriptor dest - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = 16; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - - // source tuple descriptor - // TSlotDescriptor - // int offset = 1; - // int i = 0; - // k1 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = 0; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k1"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k2 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 16; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k2"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k3 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 32; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k3"; - slot_desc.slotIdx = 3; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k4(partitioned column) - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 48; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k4"; - slot_desc.slotIdx = 4; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - { - // TTupleDescriptor source - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 1; - t_tuple_desc.byteSize = 64; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_state.set_desc_tbl(_desc_tbl); -} - -void BrokerScanNodeTest::init() { - _params.column_separator = ','; - _params.line_delimiter = '\n'; - - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - - for (int i = 0; i < 4; ++i) { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttoint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 5 + i; - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(i + 1, expr); - _params.src_slot_ids.push_back(5 + i); - } - // _params.__isset.expr_of_dest_slot = true; - _params.__set_dest_tuple_id(0); - _params.__set_src_tuple_id(1); - - init_desc_table(); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.broker_scan_node.tuple_id = 0; - _tnode.__isset.broker_scan_node = true; -} - -TEST_F(BrokerScanNodeTest, normal) { - BrokerScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - auto status = scan_node.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // set scan range - std::vector scan_ranges; - - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 0; - range.size = -1; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - range.splittable = true; - std::vector columns_from_path {"1"}; - range.__set_columns_from_path(columns_from_path); - range.__set_num_of_columns_from_file(3); - broker_scan_range.ranges.push_back(range); - - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - - scan_ranges.push_back(scan_range_params); - } - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 1; - range.size = 7; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - range.splittable = true; - std::vector columns_from_path {"2"}; - range.__set_columns_from_path(columns_from_path); - range.__set_num_of_columns_from_file(3); - broker_scan_range.ranges.push_back(range); - - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - - scan_ranges.push_back(scan_range_params); - } - - scan_node.set_scan_ranges(scan_ranges); - - status = scan_node.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); - - bool eos = false; - status = scan_node.get_next(&_runtime_state, &batch, &eos); - EXPECT_EQ(3, batch.num_rows()); - EXPECT_FALSE(eos); - - batch.reset(); - status = scan_node.get_next(&_runtime_state, &batch, &eos); - EXPECT_EQ(1, batch.num_rows()); - EXPECT_FALSE(eos); - - batch.reset(); - status = scan_node.get_next(&_runtime_state, &batch, &eos); - EXPECT_EQ(0, batch.num_rows()); - EXPECT_TRUE(eos); - - scan_node.close(&_runtime_state); - { - std::stringstream ss; - scan_node.runtime_profile()->pretty_print(&ss); - LOG(INFO) << ss.str(); - } -} - -} // namespace doris diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp deleted file mode 100644 index e3c784dd34..0000000000 --- a/be/test/exec/broker_scanner_test.cpp +++ /dev/null @@ -1,742 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/broker_scanner.h" - -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exprs/cast_functions.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class BrokerScannerTest : public testing::Test { -public: - BrokerScannerTest() : _runtime_state(TQueryGlobals()) { - init(); - _profile = _runtime_state.runtime_profile(); - _runtime_state.init_mem_trackers(); - } - void init(); - - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - void init_desc_table(); - void init_params(); - - RuntimeState _runtime_state; - RuntimeProfile* _profile; - ObjectPool _obj_pool; - std::map _slots_map; - TBrokerScanRangeParams _params; - DescriptorTbl* _desc_tbl; - std::vector _addresses; - ScannerCounter _counter; - std::vector _pre_filter; -}; - -void BrokerScannerTest::init_desc_table() { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::MYSQL_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - int next_slot_id = 1; - // TSlotDescriptor - // int offset = 1; - // int i = 0; - // k1 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = 0; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k1"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k2 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 4; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k2"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k3 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 8; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k3"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - t_desc_table.__isset.slotDescriptors = true; - { - // TTupleDescriptor dest - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = 12; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - - // source tuple descriptor - // TSlotDescriptor - // int offset = 1; - // int i = 0; - // k1 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = 0; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k1"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k2 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 16; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k2"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - // k3 - { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = 32; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = -1; - slot_desc.colName = "k3"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - { - // TTupleDescriptor source - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 1; - t_tuple_desc.byteSize = 48; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_state.set_desc_tbl(_desc_tbl); -} - -void BrokerScannerTest::init_params() { - _params.column_separator = ','; - _params.line_delimiter = '\n'; - - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - - for (int i = 0; i < 3; ++i) { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttoint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 4 + i; - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(i + 1, expr); - _params.src_slot_ids.push_back(4 + i); - } - // _params.__isset.expr_of_dest_slot = true; - _params.__set_dest_tuple_id(0); - _params.__set_src_tuple_id(1); -} - -void BrokerScannerTest::init() { - init_desc_table(); - init_params(); -} - -TEST_F(BrokerScannerTest, normal) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 0; - range.size = -1; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 1,2,3 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(1, *(int*)tuple->get_slot(0)); - EXPECT_EQ(2, *(int*)tuple->get_slot(4)); - EXPECT_EQ(3, *(int*)tuple->get_slot(8)); - - // 4,5,6 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(4, *(int*)tuple->get_slot(0)); - EXPECT_EQ(5, *(int*)tuple->get_slot(4)); - EXPECT_EQ(6, *(int*)tuple->get_slot(8)); - - // 7, 8, unqualified - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_FALSE(fill_tuple); - - // 8,9,10 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(8, *(int*)tuple->get_slot(0)); - EXPECT_EQ(9, *(int*)tuple->get_slot(4)); - EXPECT_EQ(10, *(int*)tuple->get_slot(8)); - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal2) { - std::vector ranges; - - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal2_1.csv"; - range.start_offset = 0; - range.size = 7; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - range.path = "./be/test/exec/test_data/broker_scanner/normal2_2.csv"; - range.start_offset = 0; - range.size = 4; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 1,2,3 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(1, *(int*)tuple->get_slot(0)); - EXPECT_EQ(2, *(int*)tuple->get_slot(4)); - EXPECT_EQ(3, *(int*)tuple->get_slot(8)); - - // 3,4,5 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_TRUE(fill_tuple); - EXPECT_EQ(3, *(int*)tuple->get_slot(0)); - EXPECT_EQ(4, *(int*)tuple->get_slot(4)); - EXPECT_EQ(5, *(int*)tuple->get_slot(8)); - - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(fill_tuple); - EXPECT_FALSE(eof); - - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(fill_tuple); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal3) { - std::vector ranges; - - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal2_1.csv"; - range.start_offset = 0; - range.size = 7; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - range.path = "./be/test/exec/test_data/broker_scanner/normal2_2.csv"; - range.start_offset = 0; - range.size = 5; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 1,2,3 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(1, *(int*)tuple->get_slot(0)); - EXPECT_EQ(2, *(int*)tuple->get_slot(4)); - EXPECT_EQ(3, *(int*)tuple->get_slot(8)); - - // 3,4,5 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_TRUE(fill_tuple); - EXPECT_EQ(3, *(int*)tuple->get_slot(0)); - EXPECT_EQ(4, *(int*)tuple->get_slot(4)); - EXPECT_EQ(5, *(int*)tuple->get_slot(8)); - - // first line of normal2_2.csv is 2,3, which is unqualified - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_FALSE(fill_tuple); - - // 4,5,6 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(4, *(int*)tuple->get_slot(0)); - EXPECT_EQ(5, *(int*)tuple->get_slot(4)); - EXPECT_EQ(6, *(int*)tuple->get_slot(8)); - - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal4) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 0; - range.size = 7; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 1,2,3 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(1, *(int*)tuple->get_slot(0)); - EXPECT_EQ(2, *(int*)tuple->get_slot(4)); - EXPECT_EQ(3, *(int*)tuple->get_slot(8)); - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal5) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 0; - range.size = 0; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal6) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 1; - range.size = 7; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 4,5,6 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(4, *(int*)tuple->get_slot(0)); - EXPECT_EQ(5, *(int*)tuple->get_slot(4)); - EXPECT_EQ(6, *(int*)tuple->get_slot(8)); - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal7) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 1; - range.size = 6; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal8) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 7; - range.size = 1; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 4,5,6 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(4, *(int*)tuple->get_slot(0)); - EXPECT_EQ(5, *(int*)tuple->get_slot(4)); - EXPECT_EQ(6, *(int*)tuple->get_slot(8)); - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, normal9) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/normal.csv"; - range.start_offset = 8; - range.size = 1; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BrokerScannerTest, multi_bytes_1) { - std::vector ranges; - TBrokerRangeDesc range; - range.path = "./be/test/exec/test_data/broker_scanner/multi_bytes_sep.csv"; - range.start_offset = 0; - range.size = 18; - range.splittable = true; - range.file_type = TFileType::FILE_LOCAL; - range.format_type = TFileFormatType::FORMAT_CSV_PLAIN; - ranges.push_back(range); - - _params.column_separator_str = "AAAA"; - _params.line_delimiter_str = "BB"; - _params.column_separator_length = 4; - _params.line_delimiter_length = 2; - BrokerScanner scanner(&_runtime_state, _profile, _params, ranges, _addresses, _pre_filter, - &_counter); - auto st = scanner.open(); - EXPECT_TRUE(st.ok()); - - MemPool tuple_pool; - Tuple* tuple = (Tuple*)tuple_pool.allocate(20); - bool fill_tuple; - bool eof = false; - // 4,5,6 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(4, *(int*)tuple->get_slot(0)); - EXPECT_EQ(5, *(int*)tuple->get_slot(4)); - EXPECT_EQ(6, *(int*)tuple->get_slot(8)); - // 1,2,3 - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(1, *(int*)tuple->get_slot(0)); - EXPECT_EQ(2, *(int*)tuple->get_slot(4)); - EXPECT_EQ(3, *(int*)tuple->get_slot(8)); - // end of file - st = scanner.get_next(tuple, &tuple_pool, &eof, &fill_tuple); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -} // end namespace doris diff --git a/be/test/exec/buffered_reader_test.cpp b/be/test/exec/buffered_reader_test.cpp deleted file mode 100644 index 940635a7f1..0000000000 --- a/be/test/exec/buffered_reader_test.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "io/buffered_reader.h" - -#include - -#include "io/local_file_reader.h" -#include "util/stopwatch.hpp" - -namespace doris { -class BufferedReaderTest : public testing::Test { -public: - BufferedReaderTest() {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} -}; - -TEST_F(BufferedReaderTest, normal_use) { - RuntimeProfile profile("test"); - // buffered_reader_test_file 950 bytes - auto file_reader = new LocalFileReader( - "./be/test/exec/test_data/buffered_reader/buffered_reader_test_file", 0); - BufferedReader reader(&profile, file_reader, 1024); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - uint8_t buf[1024]; - MonotonicStopWatch watch; - watch.start(); - int64_t read_length = 0; - st = reader.readat(0, 1024, &read_length, buf); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(950, read_length); - LOG(INFO) << "read bytes " << read_length << " using time " << watch.elapsed_time(); -} - -TEST_F(BufferedReaderTest, test_validity) { - RuntimeProfile profile("test"); - // buffered_reader_test_file.txt 45 bytes - auto file_reader = new LocalFileReader( - "./be/test/exec/test_data/buffered_reader/buffered_reader_test_file.txt", 0); - BufferedReader reader(&profile, file_reader, 64); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - uint8_t buf[10]; - bool eof = false; - int64_t buf_len = 10; - int64_t read_length = 0; - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhjlnprt", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("vxzAbCdEfG", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("hIj\n\nMnOpQ", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("rStUvWxYz\n", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("IjKl", std::string((char*)buf, 4).c_str()); - EXPECT_FALSE(eof); - - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BufferedReaderTest, test_seek) { - RuntimeProfile profile("test"); - // buffered_reader_test_file.txt 45 bytes - auto file_reader = new LocalFileReader( - "./be/test/exec/test_data/buffered_reader/buffered_reader_test_file.txt", 0); - BufferedReader reader(&profile, file_reader, 64); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - uint8_t buf[10]; - bool eof = false; - size_t buf_len = 10; - int64_t read_length = 0; - - // Seek to the end of the file - st = reader.seek(45); - EXPECT_TRUE(st.ok()); - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); - - // Seek to the beginning of the file - st = reader.seek(0); - EXPECT_TRUE(st.ok()); - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhjlnprt", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - // Seek to a wrong position - st = reader.seek(-1); - EXPECT_TRUE(st.ok()); - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhjlnprt", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - // Seek to a wrong position - st = reader.seek(-1000); - EXPECT_TRUE(st.ok()); - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhjlnprt", std::string((char*)buf, read_length).c_str()); - EXPECT_FALSE(eof); - - // Seek to a wrong position - st = reader.seek(1000); - EXPECT_TRUE(st.ok()); - st = reader.read(buf, buf_len, &read_length, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(BufferedReaderTest, test_miss) { - RuntimeProfile profile("test"); - // buffered_reader_test_file.txt 45 bytes - auto file_reader = new LocalFileReader( - "./be/test/exec/test_data/buffered_reader/buffered_reader_test_file.txt", 0); - BufferedReader reader(&profile, file_reader, 64); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - uint8_t buf[128]; - int64_t bytes_read; - - st = reader.readat(20, 10, &bytes_read, buf); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("hIj\n\nMnOpQ", std::string((char*)buf, (size_t)bytes_read).c_str()); - EXPECT_EQ(10, bytes_read); - - st = reader.readat(0, 5, &bytes_read, buf); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhj", std::string((char*)buf, (size_t)bytes_read).c_str()); - EXPECT_EQ(5, bytes_read); - - st = reader.readat(5, 10, &bytes_read, buf); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("lnprtvxzAb", std::string((char*)buf, (size_t)bytes_read).c_str()); - EXPECT_EQ(10, bytes_read); - - // if requested length is larger than the capacity of buffer, do not - // need to copy the character into local buffer. - st = reader.readat(0, 128, &bytes_read, buf); - EXPECT_TRUE(st.ok()); - EXPECT_STREQ("bdfhjlnprt", std::string((char*)buf, 10).c_str()); - EXPECT_EQ(45, bytes_read); -} - -} // end namespace doris diff --git a/be/test/exec/csv_scan_bench_test.cpp b/be/test/exec/csv_scan_bench_test.cpp deleted file mode 100644 index 0fa1a5b4b9..0000000000 --- a/be/test/exec/csv_scan_bench_test.cpp +++ /dev/null @@ -1,325 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "exec/csv_scan_node.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "gperftools/profiler.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "util/debug_util.h" - -namespace doris { - -class CsvScanNodeBenchTest : public testing::Test { -public: - CsvScanNodeBenchTest() {} - ~CsvScanNodeBenchTest() {} - -protected: - virtual void SetUp() { - config::mini_load_download_path = "./test_run"; - - system("mkdir -p ./test_run/test_db_name/test_label"); - system("pwd"); - system("cp -r ./be/test/query/exec/test_data/csv_scanner ./test_run/."); - init(); - } - virtual void TearDown() { system("rm -rf ./test_run"); } - - void init(); - void init_desc_tbl(); - void init_runtime_state(); - -private: - ObjectPool _obj_pool; - TDescriptorTable _t_desc_table; - DescriptorTbl* _desc_tbl; - RuntimeState* _state; - TPlanNode _tnode; -}; // end class CsvScanNodeBenchTest - -void CsvScanNodeBenchTest::init() { - init_desc_tbl(); - init_runtime_state(); -} - -void CsvScanNodeBenchTest::init_runtime_state() { - _state = _obj_pool.add(new RuntimeState("2015-04-27 01:01:01")); - _state->set_desc_tbl(_desc_tbl); -} - -void CsvScanNodeBenchTest::init_desc_tbl() { - // TTableDescriptor - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = "test"; - t_table_desc.tableName = "test_table_name"; - t_table_desc.dbName = "test_db_name"; - t_table_desc.__isset.olapTable = true; - - _t_desc_table.tableDescriptors.push_back(t_table_desc); - _t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - std::vector slot_descs; - int offset = 1; - int i = 0; - // UserId - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column0"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // column 2 - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column1"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // column 3 - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column2"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // column 4: varchar - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_VARCHAR)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column3"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - ++i; - // Date - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_DATE)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column4"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DateTimeValue); - } - ++i; - // DateTime - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_DATETIME)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column5"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DateTimeValue); - } - ++i; - // - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_VARCHAR)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("column6"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - _t_desc_table.__set_slotDescriptors(slot_descs); - - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - _t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, _t_desc_table, &_desc_tbl); - - // node - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::CSV_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.csv_scan_node.tuple_id = 0; - - _tnode.csv_scan_node.__set_column_separator(","); - _tnode.csv_scan_node.__set_line_delimiter("\n"); - - // column_type_mapping - std::map column_type_map; - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["column0"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["column1"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["column2"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::VARCHAR); - column_type_map["column3"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DATE); - column_type_map["column4"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DATETIME); - column_type_map["column5"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::VARCHAR); - column_type_map["column6"] = column_type; - } - _tnode.csv_scan_node.__set_column_type_mapping(column_type_map); - - std::vector file_paths; - // file_paths.push_back("./test_run/csv_scanner/csv_file1"); - // file_paths.push_back("./test_run/csv_scanner/csv_file2"); - file_paths.push_back("/home/ling/tmp/100_wan_line_data"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - _tnode.csv_scan_node.__set_column_separator("\t"); - _tnode.csv_scan_node.__set_line_delimiter("\n"); - - std::vector columns; - columns.push_back("column0"); - columns.push_back("column1"); - columns.push_back("column2"); - columns.push_back("column3"); - columns.push_back("column4"); - columns.push_back("column5"); - columns.push_back("column6"); - _tnode.csv_scan_node.__set_columns(columns); - - _tnode.csv_scan_node.__isset.unspecified_columns = true; - _tnode.csv_scan_node.__isset.default_values = true; - _tnode.csv_scan_node.max_filter_ratio = 0.5; - _tnode.__isset.csv_scan_node = true; -} - -TEST_F(CsvScanNodeBenchTest, NormalUse) { - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - bool eos = false; - - while (!eos) { - // RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - RowBatch row_batch(scan_node._row_descriptor, 1024); - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - // EXPECT_TRUE(num > 0); - } - - EXPECT_TRUE(scan_node.close(_state).ok()); - - { - std::stringstream ss; - scan_node.runtime_profile()->pretty_print(&ss); - LOG(WARNING) << ss.str(); - } -} - -} // end namespace doris diff --git a/be/test/exec/csv_scan_node_test.cpp b/be/test/exec/csv_scan_node_test.cpp deleted file mode 100644 index f8d9251f99..0000000000 --- a/be/test/exec/csv_scan_node_test.cpp +++ /dev/null @@ -1,408 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/csv_scan_node.h" - -#include - -#include - -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/cpu_info.h" -#include "util/debug_util.h" -#include "util/disk_info.h" - -namespace doris { - -class CsvScanNodeTest : public testing::Test { -public: - CsvScanNodeTest() {} - ~CsvScanNodeTest() {} - -protected: - virtual void SetUp() { - config::periodic_counter_update_period_ms = 500; - config::storage_root_path = "./data"; - _env.reset(new ExecEnv()); - - system("mkdir -p ./test_run/output/"); - system("pwd"); - system("cp -r ./be/test/exec/test_data/csv_scan_node ./test_run/."); - init(); - } - virtual void TearDown() { - _obj_pool.clear(); - _env.reset(); - // system("rm -rf ./test_run"); - } - - void init(); - void init_desc_tbl(); - void init_runtime_state(); - -private: - ObjectPool _obj_pool; - TDescriptorTable _t_desc_table; - DescriptorTbl* _desc_tbl; - TPlanNode _tnode; - std::unique_ptr _env; - RuntimeState* _state; -}; // end class CsvScanNodeTest - -void CsvScanNodeTest::init() { - _env->init_for_tests(); - init_desc_tbl(); - init_runtime_state(); -} - -void CsvScanNodeTest::init_runtime_state() { - _state = _obj_pool.add(new RuntimeState(TUniqueId(), TQueryOptions(), "", _env.get())); - _state->set_desc_tbl(_desc_tbl); - _state->_load_dir = "./test_run/output/"; - _state->init_mem_trackers(TUniqueId()); -} - -void CsvScanNodeTest::init_desc_tbl() { - // TTableDescriptor - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = "test"; - t_table_desc.tableName = "test_table_name"; - t_table_desc.dbName = "test_db_name"; - t_table_desc.__isset.olapTable = true; - - _t_desc_table.tableDescriptors.push_back(t_table_desc); - _t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - std::vector slot_descs; - int offset = 1; - int i = 0; - // int_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(gen_type_desc(TPrimitiveType::INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("int_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // decimal_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - TTypeDesc ttype = gen_type_desc(TPrimitiveType::DECIMALV2); - ttype.types[0].scalar_type.__set_precision(10); - ttype.types[0].scalar_type.__set_scale(5); - t_slot_desc.__set_slotType(ttype); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("decimal_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DecimalValueV2); - } - ++i; - // date_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(gen_type_desc(TPrimitiveType::DATE)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("date_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DateTimeValue); - } - ++i; - // fix_len_string_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - TTypeDesc ttype = gen_type_desc(TPrimitiveType::CHAR); - ttype.types[0].scalar_type.__set_len(5); - t_slot_desc.__set_slotType(ttype); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("fix_len_string_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - _t_desc_table.__set_slotDescriptors(slot_descs); - - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - _t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, _t_desc_table, &_desc_tbl); - - // node - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::CSV_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.csv_scan_node.tuple_id = 0; - - _tnode.csv_scan_node.__set_column_separator(","); - _tnode.csv_scan_node.__set_line_delimiter("\n"); - - // column_type_mapping - std::map column_type_map; - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["int_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DECIMALV2); - column_type.__set_precision(10); - column_type.__set_scale(5); - column_type_map["decimal_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DATE); - column_type_map["date_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::BIGINT); - column_type.__set_len(5); - column_type_map["fix_len_string_column"] = column_type; - } - _tnode.csv_scan_node.__set_column_type_mapping(column_type_map); - - std::vector columns; - columns.push_back("int_column"); - columns.push_back("date_column"); - columns.push_back("decimal_column"); - columns.push_back("fix_len_string_column"); - _tnode.csv_scan_node.__set_columns(columns); - - _tnode.csv_scan_node.__isset.unspecified_columns = true; - _tnode.csv_scan_node.__isset.default_values = true; - _tnode.csv_scan_node.max_filter_ratio = 0.5; - _tnode.__isset.csv_scan_node = true; -} - -TEST_F(CsvScanNodeTest, NormalUse) { - std::vector file_paths; - file_paths.push_back("./test_run/csv_scan_node/normal_use"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_EQ(num, 6); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - if (i == 0) { - EXPECT_EQ(std::string("[(1 -12345.67891 2015-04-20 abc\0\0)]", 35), - print_row(row, scan_node._row_descriptor)); - } - } - } - - EXPECT_TRUE(scan_node.close(_state).ok()); -} - -TEST_F(CsvScanNodeTest, continuousDelim) { - std::vector file_paths; - file_paths.push_back("./test_run/csv_scan_node/continuous_delim"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_EQ(num, 1); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - if (i == 0) { - EXPECT_EQ(std::string("[(1 -12345.67891 2015-04-20 \0\0\0\0\0)]", 35), - print_row(row, scan_node._row_descriptor)); - } - } - } - - EXPECT_TRUE(scan_node.close(_state).ok()); -} - -TEST_F(CsvScanNodeTest, wrong_decimal_format_test) { - std::vector file_paths; - file_paths.push_back("./test_run/csv_scan_node/wrong_decimal_format"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_EQ(0, num); - } - - // Failed because reach max_filter_ratio - EXPECT_TRUE(!scan_node.close(_state).ok()); -} - -TEST_F(CsvScanNodeTest, fill_fix_len_stringi_test) { - std::vector file_paths; - file_paths.push_back("./test_run/csv_scan_node/fill_string_len"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_TRUE(num > 0); - - // 1,2015-04-20,12345.67891,abcdefg - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row[" << i << "]: " << print_row(row, scan_node._row_descriptor); - - if (i == 0) { - EXPECT_EQ(std::string("[(1 12345.67891 2015-04-20 ab\0\0\0)]", 34), - print_row(row, scan_node._row_descriptor)); - Tuple* tuple = row->get_tuple(0); - StringValue* str_slot = - tuple->get_string_slot(_t_desc_table.slotDescriptors[3].byteOffset); - EXPECT_EQ(5, str_slot->len); - } - } - } - - EXPECT_TRUE(scan_node.close(_state).ok()); -} - -TEST_F(CsvScanNodeTest, wrong_fix_len_string_format_test) { - std::vector file_paths; - file_paths.push_back("./test_run/csv_scan_node/wrong_fix_len_string"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_EQ(0, num); - } - - // Failed because reach max_filter_ratio - EXPECT_TRUE(!scan_node.close(_state).ok()); -} - -// To be added test case -// 1. String import -// 2. Do not specify columns with default values -// 3. If there is a column in the file but not in the table, the column is skipped in the import command -// 4. max_filter_ratio - -} // end namespace doris diff --git a/be/test/exec/csv_scanner_test.cpp b/be/test/exec/csv_scanner_test.cpp deleted file mode 100644 index cc879eb69e..0000000000 --- a/be/test/exec/csv_scanner_test.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/csv_scanner.h" - -#include - -namespace doris { - -class CsvScannerTest : public testing::Test { -public: - CsvScannerTest() {} - -protected: - virtual void SetUp() { init(); } - virtual void TearDown() { system("rm -rf ./test_run"); } - - void init(); - - void init_desc_tbl(); - -private: - std::vector _file_paths; -}; - -void CsvScannerTest::init() { - system("mkdir -p ./test_run"); - system("pwd"); - system("cp -r ./be/test/exec/test_data/csv_scanner ./test_run/."); - - _file_paths.push_back("./test_run/csv_scanner/csv_file1"); - _file_paths.push_back("./test_run/csv_scanner/csv_file2"); -} - -TEST_F(CsvScannerTest, normal_use) { - CsvScanner scanner(_file_paths); - Status status = scanner.open(); - EXPECT_TRUE(status.ok()); - - std::string line_str; - bool eos = false; - status = scanner.get_next_row(&line_str, &eos); - EXPECT_TRUE(status.ok()); - - while (!eos) { - status = scanner.get_next_row(&line_str, &eos); - - if (eos) { - break; - } - EXPECT_TRUE(status.ok()); - - LOG(WARNING) << line_str; - } -} - -TEST_F(CsvScannerTest, no_exist_files) { - std::vector no_exist_files; - no_exist_files.push_back("no_exist_files1"); - no_exist_files.push_back("no_exist_files2"); - - CsvScanner scanner(no_exist_files); - Status status = scanner.open(); - // check until 'get_next_row()' - EXPECT_TRUE(status.ok()); - - std::string line_str; - bool eos = false; - status = scanner.get_next_row(&line_str, &eos); - EXPECT_FALSE(status.ok()); -} - -} // end namespace doris diff --git a/be/test/exec/es_http_scan_node_test.cpp b/be/test/exec/es_http_scan_node_test.cpp deleted file mode 100644 index 7d0a63596b..0000000000 --- a/be/test/exec/es_http_scan_node_test.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es_http_scan_node.h" - -#include - -#include - -#include "common/object_pool.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -using std::vector; - -namespace doris { - -// mock -class EsHttpScanNodeTest : public testing::Test { -public: - EsHttpScanNodeTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_mem_trackers(); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::ES_TABLE; - t_table_desc.numCols = 1; - t_table_desc.numClusteringCols = 0; - t_table_desc.__isset.esTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - int offset = 1; - int i = 0; - // id - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int); - } - - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.__isset.slotDescriptors = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - _runtime_state.set_desc_tbl(_desc_tbl); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.es_scan_node.tuple_id = 0; - std::map properties; - _tnode.es_scan_node.__set_properties(properties); - _tnode.__isset.es_scan_node = true; - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - TPlanNode _tnode; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState _runtime_state; -}; - -TEST_F(EsHttpScanNodeTest, normal_use) { - EsHttpScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.init(_tnode, &_runtime_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // scan range - TEsScanRange es_scan_range; - es_scan_range.__set_index("index1"); - es_scan_range.__set_type("docs"); - es_scan_range.__set_shard_id(0); - TNetworkAddress es_host; - es_host.__set_hostname("unknown"); - es_host.__set_port(8200); - std::vector es_hosts; - es_hosts.push_back(es_host); - es_scan_range.__set_es_hosts(es_hosts); - TScanRange scan_range; - scan_range.__set_es_scan_range(es_scan_range); - TScanRangeParams scan_range_params; - scan_range_params.__set_scan_range(scan_range); - std::vector scan_ranges; - scan_ranges.push_back(scan_range_params); - - status = scan_node.set_scan_ranges(scan_ranges); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.close(&_runtime_state); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/es_predicate_test.cpp b/be/test/exec/es_predicate_test.cpp deleted file mode 100644 index 5a91580b36..0000000000 --- a/be/test/exec/es_predicate_test.cpp +++ /dev/null @@ -1,173 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es/es_predicate.h" - -#include - -#include -#include - -#include "common/logging.h" -#include "common/status.h" -#include "exec/es/es_query_builder.h" -#include "exprs/binary_predicate.h" -#include "gen_cpp/Exprs_types.h" -#include "rapidjson/document.h" -#include "rapidjson/rapidjson.h" -#include "rapidjson/stringbuffer.h" -#include "rapidjson/writer.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/primitive_type.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" - -namespace doris { - -class RuntimeState; - -class EsPredicateTest : public testing::Test { -public: - EsPredicateTest() : _runtime_state(TQueryGlobals()) { - _runtime_state.init_mem_trackers(); - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::ES_TABLE; - t_table_desc.numCols = 1; - t_table_desc.numClusteringCols = 0; - t_table_desc.__isset.esTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - int offset = 1; - int i = 0; - // id - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift()); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.colName = "id"; - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int); - } - - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.__isset.slotDescriptors = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - _runtime_state.set_desc_tbl(_desc_tbl); - } - - Status build_expr_context_list(std::vector& conjunct_ctxs); - void init(); - void SetUp() override {} - void TearDown() override {} - -private: - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState _runtime_state; -}; - -Status EsPredicateTest::build_expr_context_list(std::vector& conjunct_ctxs) { - TExpr texpr; - { - TExprNode node0; - node0.opcode = TExprOpcode::GT; - node0.child_type = TPrimitiveType::BIGINT; - node0.node_type = TExprNodeType::BINARY_PRED; - node0.num_children = 2; - node0.__isset.opcode = true; - node0.__isset.child_type = true; - node0.type = gen_type_desc(TPrimitiveType::BOOLEAN); - texpr.nodes.emplace_back(node0); - - TExprNode node1; - node1.node_type = TExprNodeType::SLOT_REF; - node1.type = gen_type_desc(TPrimitiveType::INT); - node1.__isset.slot_ref = true; - node1.num_children = 0; - node1.slot_ref.slot_id = 0; - node1.slot_ref.tuple_id = 0; - node1.output_column = true; - node1.__isset.output_column = true; - texpr.nodes.emplace_back(node1); - - TExprNode node2; - TIntLiteral intLiteral; - intLiteral.value = 10; - node2.node_type = TExprNodeType::INT_LITERAL; - node2.type = gen_type_desc(TPrimitiveType::BIGINT); - node2.__isset.int_literal = true; - node2.int_literal = intLiteral; - texpr.nodes.emplace_back(node2); - } - - std::vector conjuncts; - conjuncts.emplace_back(texpr); - Status status = Expr::create_expr_trees(&_obj_pool, conjuncts, &conjunct_ctxs); - - return status; -} - -TEST_F(EsPredicateTest, normal) { - std::vector conjunct_ctxs; - Status status = build_expr_context_list(conjunct_ctxs); - EXPECT_TRUE(status.ok()); - TupleDescriptor* tuple_desc = _desc_tbl->get_tuple_descriptor(0); - std::vector predicates; - for (int i = 0; i < conjunct_ctxs.size(); ++i) { - EsPredicate* predicate = new EsPredicate(conjunct_ctxs[i], tuple_desc, &_obj_pool); - if (predicate->build_disjuncts_list().ok()) { - predicates.push_back(predicate); - } - } - - rapidjson::Document document; - rapidjson::Value compound_bool_value(rapidjson::kObjectType); - compound_bool_value.SetObject(); - BooleanQueryBuilder::to_query(predicates, &document, &compound_bool_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - compound_bool_value.Accept(writer); - std::string actual_bool_json = buffer.GetString(); - std::string expected_json = - "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"range\":{\"id\":{\"gt\":\"10\"}}}]}}]" - "}}"; - LOG(INFO) << "compound bool query" << actual_bool_json; - EXPECT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); - for (auto predicate : predicates) { - delete predicate; - } -} - -} // end namespace doris diff --git a/be/test/exec/es_query_builder_test.cpp b/be/test/exec/es_query_builder_test.cpp deleted file mode 100644 index 020c36a145..0000000000 --- a/be/test/exec/es_query_builder_test.cpp +++ /dev/null @@ -1,624 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es/es_query_builder.h" - -#include - -#include -#include - -#include "common/logging.h" -#include "exec/es/es_predicate.h" -#include "rapidjson/document.h" -#include "rapidjson/rapidjson.h" -#include "rapidjson/stringbuffer.h" -#include "rapidjson/writer.h" -#include "runtime/string_value.h" -#include "util/debug/leakcheck_disabler.h" - -namespace doris { - -class BooleanQueryBuilderTest : public testing::Test { -public: - BooleanQueryBuilderTest() {} - virtual ~BooleanQueryBuilderTest() {} -}; - -TEST_F(BooleanQueryBuilderTest, term_query) { - // content = "wyf" - char str[] = "wyf"; - StringValue value(str, 3); - ExtLiteral term_literal(TYPE_VARCHAR, &value); - TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(3); - std::string name = "content"; - ExtBinaryPredicate term_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::EQ, - term_literal); - TermQueryBuilder term_query(term_predicate); - rapidjson::Document document; - rapidjson::Value term_value(rapidjson::kObjectType); - term_value.SetObject(); - term_query.to_json(&document, &term_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - term_value.Accept(writer); - std::string actual_json = buffer.GetString(); - //LOG(INFO) << "term query" << actual_json; - EXPECT_STREQ("{\"term\":{\"content\":\"wyf\"}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, range_query) { - // k >= a - char str[] = "a"; - StringValue value(str, 1); - ExtLiteral term_literal(TYPE_VARCHAR, &value); - TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(1); - std::string name = "k"; - ExtBinaryPredicate range_predicate(TExprNodeType::BINARY_PRED, name, type_desc, TExprOpcode::GE, - term_literal); - RangeQueryBuilder range_query(range_predicate); - rapidjson::Document document; - rapidjson::Value range_value(rapidjson::kObjectType); - range_value.SetObject(); - range_query.to_json(&document, &range_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - range_value.Accept(writer); - std::string actual_json = buffer.GetString(); - //LOG(INFO) << "range query" << actual_json; - EXPECT_STREQ("{\"range\":{\"k\":{\"gte\":\"a\"}}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, es_query) { - // esquery('random', "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") - char str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; - int length = (int)strlen(str); - TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); - std::string name = "random"; - ExtColumnDesc col_des(name, type_desc); - std::vector cols = {col_des}; - StringValue value(str, length); - ExtLiteral term_literal(TYPE_VARCHAR, &value); - std::vector values = {term_literal}; - std::string function_name = "esquery"; - ExtFunction function_predicate(TExprNodeType::FUNCTION_CALL, function_name, cols, values); - ESQueryBuilder es_query(function_predicate); - rapidjson::Document document; - rapidjson::Value es_query_value(rapidjson::kObjectType); - es_query_value.SetObject(); - es_query.to_json(&document, &es_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - es_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - //LOG(INFO) << "es query" << actual_json; - EXPECT_STREQ("{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, like_query) { - // content like 'a%e%g_' - char str[] = "a%e%g_"; - int length = (int)strlen(str); - LOG(INFO) << "length " << length; - TypeDescriptor type_desc = TypeDescriptor::create_varchar_type(length); - StringValue value(str, length); - ExtLiteral like_literal(TYPE_VARCHAR, &value); - std::string name = "content"; - ExtLikePredicate like_predicate(TExprNodeType::LIKE_PRED, name, type_desc, like_literal); - WildCardQueryBuilder like_query(like_predicate); - rapidjson::Document document; - rapidjson::Value like_query_value(rapidjson::kObjectType); - like_query_value.SetObject(); - like_query.to_json(&document, &like_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - like_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - // LOG(INFO) << "wildcard query" << actual_json; - EXPECT_STREQ("{\"wildcard\":{\"content\":\"a*e*g?\"}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, terms_in_query) { - // dv in ["2.0", "4.0", "8.0"] - std::string terms_in_field = "dv"; - int terms_in_field_length = terms_in_field.length(); - TypeDescriptor terms_in_col_type_desc = - TypeDescriptor::create_varchar_type(terms_in_field_length); - - char value_1[] = "2.0"; - int value_1_length = (int)strlen(value_1); - StringValue string_value_1(value_1, value_1_length); - ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); - - char value_2[] = "4.0"; - int value_2_length = (int)strlen(value_2); - StringValue string_value_2(value_2, value_2_length); - ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); - - char value_3[] = "8.0"; - int value_3_length = (int)strlen(value_3); - StringValue string_value_3(value_3, value_3_length); - ExtLiteral term_literal_3(TYPE_VARCHAR, &string_value_3); - - std::vector terms_values = {term_literal_1, term_literal_2, term_literal_3}; - ExtInPredicate in_predicate(TExprNodeType::IN_PRED, false, terms_in_field, - terms_in_col_type_desc, terms_values); - TermsInSetQueryBuilder terms_query(in_predicate); - rapidjson::Document document; - rapidjson::Value in_query_value(rapidjson::kObjectType); - in_query_value.SetObject(); - terms_query.to_json(&document, &in_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - in_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - //LOG(INFO) << "terms in sets query" << actual_json; - EXPECT_STREQ("{\"terms\":{\"dv\":[\"2.0\",\"4.0\",\"8.0\"]}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, match_all_query) { - // match all docs - MatchAllQueryBuilder match_all_query; - rapidjson::Document document; - rapidjson::Value match_all_query_value(rapidjson::kObjectType); - match_all_query_value.SetObject(); - match_all_query.to_json(&document, &match_all_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - match_all_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - //LOG(INFO) << "match all query" << actual_json; - EXPECT_STREQ("{\"match_all\":{}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, exists_query) { - // k1 is not null - // {"exists":{"field":"k1"}} - std::string exists_field = "k1"; - int exists_field_length = exists_field.length(); - TypeDescriptor exists_col_type_desc = TypeDescriptor::create_varchar_type(exists_field_length); - ExtIsNullPredicate isNullPredicate(TExprNodeType::IS_NULL_PRED, "k1", exists_col_type_desc, - true); - ExistsQueryBuilder exists_query(isNullPredicate); - rapidjson::Document document; - rapidjson::Value exists_query_value(rapidjson::kObjectType); - exists_query_value.SetObject(); - exists_query.to_json(&document, &exists_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - exists_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - EXPECT_STREQ("{\"exists\":{\"field\":\"k1\"}}", actual_json.c_str()); -} - -TEST_F(BooleanQueryBuilderTest, bool_query) { - // content like 'a%e%g_' - char like_value[] = "a%e%g_"; - int like_value_length = (int)strlen(like_value); - TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); - StringValue like_term_value(like_value, like_value_length); - ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); - std::string like_field_name = "content"; - ExtLikePredicate* like_predicate = new ExtLikePredicate( - TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); - // esquery("random", "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}") - char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; - int es_query_length = (int)strlen(es_query_str); - StringValue value(es_query_str, es_query_length); - TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); - std::string es_query_field_name = "random"; - ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); - std::vector es_query_cols = {es_query_col_des}; - StringValue es_query_value(es_query_str, es_query_length); - ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); - std::vector es_query_values = {es_query_term_literal}; - std::string function_name = "esquery"; - ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, - es_query_cols, es_query_values); - // k >= a - char range_value_str[] = "a"; - int range_value_length = (int)strlen(range_value_str); - StringValue range_value(range_value_str, range_value_length); - ExtLiteral range_literal(TYPE_VARCHAR, &range_value); - TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); - std::string range_field_name = "k"; - ExtBinaryPredicate* range_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, - TExprOpcode::GE, range_literal); - // content = "wyf" - char term_str[] = "wyf"; - int term_value_length = (int)strlen(term_str); - StringValue term_value(term_str, term_value_length); - ExtLiteral term_literal(TYPE_VARCHAR, &term_value); - TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); - std::string term_field_name = "content"; - ExtBinaryPredicate* term_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, - TExprOpcode::EQ, term_literal); - - // content like 'a%e%g_' or k >= a or content = "wyf" - std::vector or_predicates = {like_predicate, function_predicate, range_predicate, - term_predicate}; - BooleanQueryBuilder bool_query(or_predicates); - rapidjson::Document document; - rapidjson::Value bool_query_value(rapidjson::kObjectType); - bool_query_value.SetObject(); - bool_query.to_json(&document, &bool_query_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - bool_query_value.Accept(writer); - std::string actual_json = buffer.GetString(); - std::string expected_json = - "{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}},{\"bool\":{\"must_" - "not\":{\"exists\":{\"field\":\"f1\"}}}},{\"range\":{\"k\":{\"gte\":\"a\"}}},{\"term\":" - "{\"content\":\"wyf\"}}]}}"; - //LOG(INFO) << "bool query" << actual_json; - EXPECT_STREQ(expected_json.c_str(), actual_json.c_str()); - - delete like_predicate; - delete function_predicate; - delete range_predicate; - delete term_predicate; -} - -TEST_F(BooleanQueryBuilderTest, compound_bool_query) { - // content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}') - char like_value[] = "a%e%g_"; - int like_value_length = (int)strlen(like_value); - TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); - StringValue like_term_value(like_value, like_value_length); - ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); - std::string like_field_name = "content"; - ExtLikePredicate* like_predicate = new ExtLikePredicate( - TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); - - char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; - int es_query_length = (int)strlen(es_query_str); - StringValue value(es_query_str, es_query_length); - TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); - std::string es_query_field_name = "random"; - ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); - std::vector es_query_cols = {es_query_col_des}; - StringValue es_query_value(es_query_str, es_query_length); - ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); - std::vector es_query_values = {es_query_term_literal}; - std::string function_name = "esquery"; - ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, - es_query_cols, es_query_values); - - std::vector bool_predicates_1 = {like_predicate, function_predicate}; - EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); - - // k >= "a" - char range_value_str[] = "a"; - int range_value_length = (int)strlen(range_value_str); - StringValue range_value(range_value_str, range_value_length); - ExtLiteral range_literal(TYPE_VARCHAR, &range_value); - TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); - std::string range_field_name = "k"; - ExtBinaryPredicate* range_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, - TExprOpcode::GE, range_literal); - - std::vector bool_predicates_2 = {range_predicate}; - EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); - - // content != "wyf" - char term_str[] = "wyf"; - int term_value_length = (int)strlen(term_str); - StringValue term_value(term_str, term_value_length); - ExtLiteral term_literal(TYPE_VARCHAR, &term_value); - TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); - std::string term_field_name = "content"; - ExtBinaryPredicate* term_ne_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, - TExprOpcode::NE, term_literal); - std::vector bool_predicates_3 = {term_ne_predicate}; - EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); - - // fv not in [8.0, 16.0] - std::string terms_in_field = "fv"; - int terms_in_field_length = terms_in_field.length(); - TypeDescriptor terms_in_col_type_desc = - TypeDescriptor::create_varchar_type(terms_in_field_length); - - char value_1[] = "8.0"; - int value_1_length = (int)strlen(value_1); - StringValue string_value_1(value_1, value_1_length); - ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); - - char value_2[] = "16.0"; - int value_2_length = (int)strlen(value_2); - StringValue string_value_2(value_2, value_2_length); - ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); - - std::vector terms_values = {term_literal_1, term_literal_2}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, - terms_in_col_type_desc, terms_values); - std::vector bool_predicates_4 = {in_predicate}; - EsPredicate* bool_predicate_4 = new EsPredicate(bool_predicates_4); - - // (content like "a%e%g_" or esquery(random, '{"bool": {"must_not": {"exists": {"field": "f1"}}}}')) and content != "wyf" and fv not in [8.0, 16.0] - std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, - bool_predicate_3, bool_predicate_4}; - - rapidjson::Document document; - rapidjson::Value compound_bool_value(rapidjson::kObjectType); - compound_bool_value.SetObject(); - BooleanQueryBuilder::to_query(and_bool_predicates, &document, &compound_bool_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - compound_bool_value.Accept(writer); - std::string actual_bool_json = buffer.GetString(); - std::string expected_json = - "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"wildcard\":{\"content\":\"a*e*g?\"}}," - "{\"bool\":{\"must_not\":{\"exists\":{\"field\":\"f1\"}}}}]}},{\"bool\":{\"should\":[{" - "\"range\":{\"k\":{\"gte\":\"a\"}}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":[" - "{\"term\":{\"content\":\"wyf\"}}]}}]}},{\"bool\":{\"should\":[{\"bool\":{\"must_not\":" - "[{\"terms\":{\"fv\":[\"8.0\",\"16.0\"]}}]}}]}}]}}"; - //LOG(INFO) << "compound bool query" << actual_bool_json; - EXPECT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); - delete bool_predicate_1; - delete bool_predicate_2; - delete bool_predicate_3; - delete bool_predicate_4; -} - -TEST_F(BooleanQueryBuilderTest, validate_esquery) { - std::string function_name = "esquery"; - char field[] = "random"; - int field_length = (int)strlen(field); - TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(field_length); - ExtColumnDesc es_query_col_des(field, es_query_type_desc); - std::vector es_query_cols = {es_query_col_des}; - char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; - int es_query_length = (int)strlen(es_query_str); - StringValue es_query_value(es_query_str, es_query_length); - ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); - std::vector es_query_values = {es_query_term_literal}; - ExtFunction legal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, - es_query_values); - auto st = BooleanQueryBuilder::check_es_query(legal_es_query); - EXPECT_TRUE(st.ok()); - char empty_query[] = "{}"; - int empty_query_length = (int)strlen(empty_query); - StringValue empty_query_value(empty_query, empty_query_length); - ExtLiteral empty_query_term_literal(TYPE_VARCHAR, &empty_query_value); - std::vector empty_query_values = {empty_query_term_literal}; - ExtFunction empty_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, - empty_query_values); - st = BooleanQueryBuilder::check_es_query(empty_es_query); - EXPECT_STREQ(st.to_string().c_str(), "[INVALID_ARGUMENT]esquery must only one root"); - //LOG(INFO) <<"error msg:" << st1.get_error_msg(); - char malformed_query[] = "{\"bool\": {\"must_not\": {\"exists\": {"; - int malformed_query_length = (int)strlen(malformed_query); - StringValue malformed_query_value(malformed_query, malformed_query_length); - ExtLiteral malformed_query_term_literal(TYPE_VARCHAR, &malformed_query_value); - std::vector malformed_query_values = {malformed_query_term_literal}; - ExtFunction malformed_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, - malformed_query_values); - st = BooleanQueryBuilder::check_es_query(malformed_es_query); - EXPECT_STREQ(st.to_string().c_str(), "[INVALID_ARGUMENT]malformed esquery json"); - char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; - int illegal_query_length = (int)strlen(illegal_query); - StringValue illegal_query_value(illegal_query, illegal_query_length); - ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); - std::vector illegal_query_values = {illegal_query_term_literal}; - ExtFunction illegal_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, - illegal_query_values); - st = BooleanQueryBuilder::check_es_query(illegal_es_query); - EXPECT_STREQ(st.to_string().c_str(), "[INVALID_ARGUMENT]esquery must only one root"); - char illegal_key_query[] = "[\"22\"]"; - int illegal_key_query_length = (int)strlen(illegal_key_query); - StringValue illegal_key_query_value(illegal_key_query, illegal_key_query_length); - ExtLiteral illegal_key_query_term_literal(TYPE_VARCHAR, &illegal_key_query_value); - std::vector illegal_key_query_values = {illegal_key_query_term_literal}; - ExtFunction illegal_key_es_query(TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, - illegal_key_query_values); - st = BooleanQueryBuilder::check_es_query(illegal_key_es_query); - EXPECT_STREQ(st.to_string().c_str(), "[INVALID_ARGUMENT]esquery must be a object"); -} - -TEST_F(BooleanQueryBuilderTest, validate_partial) { - // TODO(yingchun): LSAN will report some errors in this scope, we should improve the code and enable LSAN later. - debug::ScopedLeakCheckDisabler disable_lsan; - char like_value[] = "a%e%g_"; - int like_value_length = (int)strlen(like_value); - TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); - StringValue like_term_value(like_value, like_value_length); - ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); - std::string like_field_name = "content"; - ExtLikePredicate* like_predicate = new ExtLikePredicate( - TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); - - // k >= "a" - char range_value_str[] = "a"; - int range_value_length = (int)strlen(range_value_str); - StringValue range_value(range_value_str, range_value_length); - ExtLiteral range_literal(TYPE_VARCHAR, &range_value); - TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); - std::string range_field_name = "k"; - ExtBinaryPredicate* range_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, - TExprOpcode::GE, range_literal); - - std::vector bool_predicates_1 = {like_predicate, range_predicate}; - EsPredicate* bool_predicate_1 = new EsPredicate(bool_predicates_1); - - // fv not in [8.0, 16.0] - std::string terms_in_field = "fv"; - int terms_in_field_length = terms_in_field.length(); - TypeDescriptor terms_in_col_type_desc = - TypeDescriptor::create_varchar_type(terms_in_field_length); - - char value_1[] = "8.0"; - int value_1_length = (int)strlen(value_1); - StringValue string_value_1(value_1, value_1_length); - ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); - - char value_2[] = "16.0"; - int value_2_length = (int)strlen(value_2); - StringValue string_value_2(value_2, value_2_length); - ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); - - std::vector terms_values = {term_literal_1, term_literal_2}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, - terms_in_col_type_desc, terms_values); - std::vector bool_predicates_2 = {in_predicate}; - EsPredicate* bool_predicate_2 = new EsPredicate(bool_predicates_2); - - // content != "wyf" - char term_str[] = "wyf"; - int term_value_length = (int)strlen(term_str); - StringValue term_value(term_str, term_value_length); - ExtLiteral term_literal(TYPE_VARCHAR, &term_value); - TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); - std::string term_field_name = "content"; - ExtBinaryPredicate* term_ne_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, - TExprOpcode::NE, term_literal); - - char es_query_str[] = "{\"bool\": {\"must_not\": {\"exists\": {\"field\": \"f1\"}}}}"; - int es_query_length = (int)strlen(es_query_str); - StringValue value(es_query_str, es_query_length); - TypeDescriptor es_query_type_desc = TypeDescriptor::create_varchar_type(es_query_length); - std::string es_query_field_name = "random"; - ExtColumnDesc es_query_col_des(es_query_field_name, es_query_type_desc); - std::vector es_query_cols = {es_query_col_des}; - StringValue es_query_value(es_query_str, es_query_length); - ExtLiteral es_query_term_literal(TYPE_VARCHAR, &es_query_value); - std::vector es_query_values = {es_query_term_literal}; - std::string function_name = "esquery"; - ExtFunction* function_predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, function_name, - es_query_cols, es_query_values); - std::vector bool_predicates_3 = {term_ne_predicate, function_predicate}; - EsPredicate* bool_predicate_3 = new EsPredicate(bool_predicates_3); - - std::vector and_bool_predicates = {bool_predicate_1, bool_predicate_2, - bool_predicate_3}; - std::vector result; - BooleanQueryBuilder::validate(and_bool_predicates, &result); - std::vector expected = {true, true, true}; - EXPECT_EQ(result, expected); - char illegal_query[] = "{\"term\": {\"k1\" : \"2\"},\"match\": {\"k1\": \"3\"}}"; - int illegal_query_length = (int)strlen(illegal_query); - StringValue illegal_query_value(illegal_query, illegal_query_length); - ExtLiteral illegal_query_term_literal(TYPE_VARCHAR, &illegal_query_value); - std::vector illegal_query_values = {illegal_query_term_literal}; - ExtFunction* illegal_function_preficate = new ExtFunction( - TExprNodeType::FUNCTION_CALL, function_name, es_query_cols, illegal_query_values); - std::vector illegal_bool_predicates_3 = {term_ne_predicate, - illegal_function_preficate}; - EsPredicate* illegal_bool_predicate_3 = new EsPredicate(illegal_bool_predicates_3); - std::vector and_bool_predicates_1 = {bool_predicate_1, bool_predicate_2, - illegal_bool_predicate_3}; - std::vector result1; - BooleanQueryBuilder::validate(and_bool_predicates_1, &result1); - std::vector expected1 = {true, true, false}; - EXPECT_EQ(result1, expected1); -} - -// ( k >= "a" and (fv not in [8.0, 16.0]) or (content != "wyf") ) or content like "a%e%g_" - -TEST_F(BooleanQueryBuilderTest, validate_compound_and) { - // TODO(yingchun): LSAN will report some errors in this scope, we should improve the code and enable LSAN later. - debug::ScopedLeakCheckDisabler disable_lsan; - std::string terms_in_field = "fv"; // fv not in [8.0, 16.0] - int terms_in_field_length = terms_in_field.length(); - TypeDescriptor terms_in_col_type_desc = - TypeDescriptor::create_varchar_type(terms_in_field_length); - - char value_1[] = "8.0"; - int value_1_length = (int)strlen(value_1); - StringValue string_value_1(value_1, value_1_length); - ExtLiteral term_literal_1(TYPE_VARCHAR, &string_value_1); - - char value_2[] = "16.0"; - int value_2_length = (int)strlen(value_2); - StringValue string_value_2(value_2, value_2_length); - ExtLiteral term_literal_2(TYPE_VARCHAR, &string_value_2); - - std::vector terms_values = {term_literal_1, term_literal_2}; - ExtInPredicate* in_predicate = new ExtInPredicate(TExprNodeType::IN_PRED, true, terms_in_field, - terms_in_col_type_desc, terms_values); - - char term_str[] = "wyf"; - int term_value_length = (int)strlen(term_str); - StringValue term_value(term_str, term_value_length); - ExtLiteral term_literal(TYPE_VARCHAR, &term_value); - TypeDescriptor term_type_desc = TypeDescriptor::create_varchar_type(term_value_length); - std::string term_field_name = "content"; - ExtBinaryPredicate* term_ne_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, term_field_name, term_type_desc, - TExprOpcode::NE, term_literal); - - std::vector inner_or_content = {term_ne_predicate, in_predicate}; - - EsPredicate* inner_or_predicate = new EsPredicate(inner_or_content); - - char range_value_str[] = "a"; // k >= "a" - int range_value_length = (int)strlen(range_value_str); - StringValue range_value(range_value_str, range_value_length); - ExtLiteral range_literal(TYPE_VARCHAR, &range_value); - TypeDescriptor range_type_desc = TypeDescriptor::create_varchar_type(range_value_length); - std::string range_field_name = "k"; - ExtBinaryPredicate* range_predicate = - new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, range_field_name, range_type_desc, - TExprOpcode::GE, range_literal); - std::vector range_predicates = {range_predicate}; - EsPredicate* left_inner_or_predicate = new EsPredicate(range_predicates); - - std::vector outer_left_predicates_1 = {left_inner_or_predicate, - inner_or_predicate}; - - ExtCompPredicates* comp_predicate = - new ExtCompPredicates(TExprOpcode::COMPOUND_AND, outer_left_predicates_1); - - char like_value[] = "a%e%g_"; - int like_value_length = (int)strlen(like_value); - TypeDescriptor like_type_desc = TypeDescriptor::create_varchar_type(like_value_length); - StringValue like_term_value(like_value, like_value_length); - ExtLiteral like_literal(TYPE_VARCHAR, &like_term_value); - std::string like_field_name = "content"; - ExtLikePredicate* like_predicate = new ExtLikePredicate( - TExprNodeType::LIKE_PRED, like_field_name, like_type_desc, like_literal); - - std::vector or_predicate_vector = {comp_predicate, like_predicate}; - EsPredicate* or_predicate = new EsPredicate(or_predicate_vector); - - std::vector or_predicates = {or_predicate}; - std::vector result1; - BooleanQueryBuilder::validate(or_predicates, &result1); - std::vector expected1 = {true}; - EXPECT_TRUE(result1 == expected1); - - rapidjson::Document document; - rapidjson::Value compound_and_value(rapidjson::kObjectType); - compound_and_value.SetObject(); - BooleanQueryBuilder::to_query(or_predicates, &document, &compound_and_value); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - compound_and_value.Accept(writer); - std::string actual_bool_json = buffer.GetString(); - std::string expected_json = - "{\"bool\":{\"filter\":[{\"bool\":{\"should\":[{\"bool\":{\"filter\":[{\"bool\":{" - "\"should\":[{\"range\":{\"k\":{\"gte\":\"a\"}}}]}},{\"bool\":{\"should\":[{\"bool\":{" - "\"must_not\":[{\"term\":{\"content\":\"wyf\"}}]}},{\"bool\":{\"must_not\":[{\"terms\":" - "{\"fv\":[\"8.0\",\"16.0\"]}}]}}]}}]}},{\"wildcard\":{\"content\":\"a*e*g?\"}}]}}]}}"; - EXPECT_STREQ(expected_json.c_str(), actual_bool_json.c_str()); -} -} // namespace doris diff --git a/be/test/exec/es_scan_reader_test.cpp b/be/test/exec/es_scan_reader_test.cpp deleted file mode 100644 index 9c4185086f..0000000000 --- a/be/test/exec/es_scan_reader_test.cpp +++ /dev/null @@ -1,247 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es/es_scan_reader.h" - -#include - -#include -#include -#include - -#include "common/logging.h" -#include "exec/es/es_scroll_query.h" -#include "http/ev_http_server.h" -#include "http/http_channel.h" -#include "http/http_handler.h" -#include "http/http_request.h" -#include "rapidjson/document.h" -#include "rapidjson/stringbuffer.h" -#include "rapidjson/writer.h" - -namespace doris { - -class RestSearchAction : public HttpHandler { -public: - void handle(HttpRequest* req) override { - std::string user; - std::string passwd; - if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { - HttpChannel::send_basic_challenge(req, "abc"); - return; - } - req->add_output_header(HttpHeaders::CONTENT_TYPE, "application/json"); - if (req->method() == HttpMethod::POST) { - std::string post_body = req->get_request_body(); - rapidjson::Document post_doc; - post_doc.Parse<0>(post_body.c_str()); - int size = 1; - if (post_doc.HasMember("size")) { - rapidjson::Value& size_value = post_doc["size"]; - size = size_value.GetInt(); - } - std::string _scroll_id(std::to_string(size)); - rapidjson::Document search_result; - rapidjson::Document::AllocatorType& allocator = search_result.GetAllocator(); - search_result.SetObject(); - rapidjson::Value scroll_id_value(_scroll_id.c_str(), allocator); - search_result.AddMember("_scroll_id", scroll_id_value, allocator); - - rapidjson::Value outer_hits(rapidjson::kObjectType); - outer_hits.AddMember("total", 10, allocator); - rapidjson::Value inner_hits(rapidjson::kArrayType); - rapidjson::Value source_document(rapidjson::kObjectType); - source_document.AddMember("id", 1, allocator); - rapidjson::Value value_node("1", allocator); - source_document.AddMember("value", value_node, allocator); - inner_hits.PushBack(source_document, allocator); - outer_hits.AddMember("hits", inner_hits, allocator); - search_result.AddMember("hits", outer_hits, allocator); - - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - search_result.Accept(writer); - //send DELETE scroll post request - std::string search_result_json = buffer.GetString(); - HttpChannel::send_reply(req, search_result_json); - } else { - std::string response = "test1"; - HttpChannel::send_reply(req, response); - } - } -}; - -class RestSearchScrollAction : public HttpHandler { -public: - void handle(HttpRequest* req) override { - std::string user; - std::string passwd; - if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { - HttpChannel::send_basic_challenge(req, "abc"); - return; - } - if (req->method() == HttpMethod::POST) { - std::string post_body = req->get_request_body(); - rapidjson::Document post_doc; - post_doc.Parse<0>(post_body.c_str()); - std::string scroll_id; - if (!post_doc.HasMember("scroll_id")) { - HttpChannel::send_reply(req, HttpStatus::NOT_FOUND, "invalid scroll request"); - return; - } else { - rapidjson::Value& scroll_id_value = post_doc["scroll_id"]; - scroll_id = scroll_id_value.GetString(); - int offset = atoi(scroll_id.c_str()); - if (offset > 10) { - rapidjson::Document end_search_result; - rapidjson::Document::AllocatorType& allocator = - end_search_result.GetAllocator(); - end_search_result.SetObject(); - rapidjson::Value scroll_id_value("11", allocator); - end_search_result.AddMember("_scroll_id", scroll_id_value, allocator); - - rapidjson::Value outer_hits(rapidjson::kObjectType); - outer_hits.AddMember("total", 0, allocator); - end_search_result.AddMember("hits", outer_hits, allocator); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - end_search_result.Accept(writer); - //send DELETE scroll post request - std::string end_search_result_json = buffer.GetString(); - HttpChannel::send_reply(req, end_search_result_json); - return; - } else { - int start = offset + 1; - rapidjson::Document search_result; - rapidjson::Document::AllocatorType& allocator = search_result.GetAllocator(); - search_result.SetObject(); - rapidjson::Value scroll_id_value(std::to_string(start).c_str(), allocator); - search_result.AddMember("_scroll_id", scroll_id_value, allocator); - - rapidjson::Value outer_hits(rapidjson::kObjectType); - outer_hits.AddMember("total", 1, allocator); - rapidjson::Value inner_hits(rapidjson::kArrayType); - rapidjson::Value source_document(rapidjson::kObjectType); - source_document.AddMember("id", start, allocator); - rapidjson::Value value_node(std::to_string(start).c_str(), allocator); - source_document.AddMember("value", value_node, allocator); - inner_hits.PushBack(source_document, allocator); - outer_hits.AddMember("hits", inner_hits, allocator); - search_result.AddMember("hits", outer_hits, allocator); - - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - search_result.Accept(writer); - //send DELETE scroll post request - std::string search_result_json = buffer.GetString(); - HttpChannel::send_reply(req, search_result_json); - return; - } - } - } - } -}; - -class RestClearScrollAction : public HttpHandler { -public: - void handle(HttpRequest* req) override { - std::string user; - std::string passwd; - if (!parse_basic_auth(*req, &user, &passwd) || user != "root") { - HttpChannel::send_basic_challenge(req, "abc"); - return; - } - if (req->method() == HttpMethod::DELETE) { - std::string post_body = req->get_request_body(); - rapidjson::Document post_doc; - post_doc.Parse<0>(post_body.c_str()); - std::string scroll_id; - if (!post_doc.HasMember("scroll_id")) { - HttpChannel::send_reply(req, HttpStatus::NOT_FOUND, "invalid scroll request"); - return; - } else { - rapidjson::Document clear_scroll_result; - rapidjson::Document::AllocatorType& allocator = clear_scroll_result.GetAllocator(); - clear_scroll_result.SetObject(); - clear_scroll_result.AddMember("succeeded", true, allocator); - clear_scroll_result.AddMember("num_freed", 1, allocator); - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - clear_scroll_result.Accept(writer); - std::string clear_scroll_result_json = buffer.GetString(); - HttpChannel::send_reply(req, clear_scroll_result_json); - return; - } - } - } -}; - -static RestSearchAction rest_search_action = RestSearchAction(); -static RestSearchScrollAction rest_search_scroll_action = RestSearchScrollAction(); -static RestClearScrollAction rest_clear_scroll_action = RestClearScrollAction(); -static EvHttpServer* mock_es_server = nullptr; -static int real_port = 0; - -class MockESServerTest : public testing::Test { -public: - MockESServerTest() {} - ~MockESServerTest() override {} - - static void SetUpTestCase() { - mock_es_server = new EvHttpServer(0); - mock_es_server->register_handler(POST, "/{index}/{type}/_search", &rest_search_action); - mock_es_server->register_handler(POST, "/_search/scroll", &rest_search_scroll_action); - mock_es_server->register_handler(DELETE, "/_search/scroll", &rest_clear_scroll_action); - mock_es_server->start(); - real_port = mock_es_server->get_real_port(); - EXPECT_NE(0, real_port); - } - - static void TearDownTestCase() { delete mock_es_server; } -}; - -TEST_F(MockESServerTest, workflow) { - std::string target = "http://127.0.0.1:" + std::to_string(real_port); - std::vector fields = {"id", "value"}; - std::map props; - props[ESScanReader::KEY_INDEX] = "tindex"; - props[ESScanReader::KEY_TYPE] = "doc"; - props[ESScanReader::KEY_USER_NAME] = "root"; - props[ESScanReader::KEY_PASS_WORD] = "root"; - props[ESScanReader::KEY_SHARD] = "0"; - props[ESScanReader::KEY_BATCH_SIZE] = "1"; - std::vector predicates; - std::map docvalue_context; - bool doc_value_mode = false; - props[ESScanReader::KEY_QUERY] = ESScrollQueryBuilder::build(props, fields, predicates, - docvalue_context, &doc_value_mode); - ESScanReader reader(target, props, doc_value_mode); - auto st = reader.open(); - EXPECT_TRUE(st.ok()); - bool eos = false; - std::unique_ptr parser = nullptr; - while (!eos) { - st = reader.get_next(&eos, parser); - EXPECT_TRUE(st.ok()); - if (eos) { - break; - } - } - auto cst = reader.close(); - EXPECT_TRUE(cst.ok()); -} -} // namespace doris diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp deleted file mode 100644 index 3a5ab8e3f0..0000000000 --- a/be/test/exec/hash_table_test.cpp +++ /dev/null @@ -1,380 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/hash_table.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "common/compiler_util.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/slot_ref.h" -#include "runtime/exec_env.h" -#include "runtime/mem_pool.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/test_env.h" -#include "testutil/test_util.h" -#include "util/cpu_info.h" -#include "util/runtime_profile.h" -#include "util/time.h" - -namespace doris { - -class HashTableTest : public testing::Test { -public: - HashTableTest() { - _mem_pool.reset(new MemPool()); - _state = _pool.add(new RuntimeState(TQueryGlobals())); - _state->init_mem_trackers(); - _state->_exec_env = ExecEnv::GetInstance(); - } - -protected: - RuntimeState* _state; - ObjectPool _pool; - std::shared_ptr _mem_pool; - std::vector _build_expr; - std::vector _probe_expr; - - virtual void SetUp() { - RowDescriptor desc; - Status status; - TypeDescriptor int_desc(TYPE_INT); - - auto build_slot_ref = _pool.add(new SlotRef(int_desc, 0)); - _build_expr.push_back(_pool.add(new ExprContext(build_slot_ref))); - status = Expr::prepare(_build_expr, _state, desc); - EXPECT_TRUE(status.ok()); - - auto probe_slot_ref = _pool.add(new SlotRef(int_desc, 0)); - _probe_expr.push_back(_pool.add(new ExprContext(probe_slot_ref))); - status = Expr::prepare(_probe_expr, _state, desc); - EXPECT_TRUE(status.ok()); - } - - void TearDown() { - Expr::close(_build_expr, _state); - Expr::close(_probe_expr, _state); - } - - TupleRow* create_tuple_row(int32_t val); - - // Wrapper to call private methods on HashTable - // TODO: understand google testing, there must be a more natural way to do this - void resize_table(HashTable* table, int64_t new_size) { table->resize_buckets(new_size); } - - // Do a full table scan on table. All values should be between [min,max). If - // all_unique, then each key(int value) should only appear once. Results are - // stored in results, indexed by the key. Results must have been preallocated to - // be at least max size. - void full_scan(HashTable* table, int min, int max, bool all_unique, TupleRow** results, - TupleRow** expected) { - HashTable::Iterator iter = table->begin(); - - while (iter != table->end()) { - TupleRow* row = iter.get_row(); - int32_t val = *reinterpret_cast(_build_expr[0]->get_value(row)); - EXPECT_GE(val, min); - EXPECT_LT(val, max); - - if (all_unique) { - EXPECT_TRUE(results[val] == nullptr); - } - - EXPECT_EQ(row->get_tuple(0), expected[val]->get_tuple(0)); - results[val] = row; - iter.next(); - } - } - - // Validate that probe_row evaluates overs probe_exprs is equal to build_row - // evaluated over build_exprs - void validate_match(TupleRow* probe_row, TupleRow* build_row) { - EXPECT_TRUE(probe_row != build_row); - int32_t build_val = *reinterpret_cast(_build_expr[0]->get_value(probe_row)); - int32_t probe_val = *reinterpret_cast(_probe_expr[0]->get_value(build_row)); - EXPECT_EQ(build_val, probe_val); - } - - struct ProbeTestData { - TupleRow* probe_row; - std::vector expected_build_rows; - }; - - void probe_test(HashTable* table, ProbeTestData* data, int num_data, bool scan) { - for (int i = 0; i < num_data; ++i) { - TupleRow* row = data[i].probe_row; - - HashTable::Iterator iter; - iter = table->find(row); - - if (data[i].expected_build_rows.size() == 0) { - EXPECT_TRUE(iter == table->end()); - } else { - if (scan) { - std::map matched; - - while (iter != table->end()) { - EXPECT_TRUE(matched.find(iter.get_row()) == matched.end()); - matched[iter.get_row()] = true; - iter.next(); - } - - EXPECT_EQ(matched.size(), data[i].expected_build_rows.size()); - - for (int j = 0; i < data[j].expected_build_rows.size(); ++j) { - EXPECT_TRUE(matched[data[i].expected_build_rows[j]]); - } - } else { - EXPECT_EQ(data[i].expected_build_rows.size(), 1); - EXPECT_EQ(data[i].expected_build_rows[0]->get_tuple(0), - iter.get_row()->get_tuple(0)); - validate_match(row, iter.get_row()); - } - } - } - } -}; - -TupleRow* HashTableTest::create_tuple_row(int32_t val) { - uint8_t* tuple_row_mem = _mem_pool->allocate(sizeof(int32_t*)); - uint8_t* tuple_mem = _mem_pool->allocate(sizeof(int32_t)); - *reinterpret_cast(tuple_mem) = val; - TupleRow* row = reinterpret_cast(tuple_row_mem); - row->set_tuple(0, reinterpret_cast(tuple_mem)); - return row; -} - -TEST_F(HashTableTest, SetupTest) { - TupleRow* build_row1 = create_tuple_row(1); - TupleRow* build_row2 = create_tuple_row(2); - TupleRow* probe_row3 = create_tuple_row(3); - TupleRow* probe_row4 = create_tuple_row(4); - - int32_t* val_row1 = reinterpret_cast(_build_expr[0]->get_value(build_row1)); - int32_t* val_row2 = reinterpret_cast(_build_expr[0]->get_value(build_row2)); - int32_t* val_row3 = reinterpret_cast(_probe_expr[0]->get_value(probe_row3)); - int32_t* val_row4 = reinterpret_cast(_probe_expr[0]->get_value(probe_row4)); - - EXPECT_EQ(*val_row1, 1); - EXPECT_EQ(*val_row2, 2); - EXPECT_EQ(*val_row3, 3); - EXPECT_EQ(*val_row4, 4); -} - -// This tests inserts the build rows [0->5) to hash table. It validates that they -// are all there using a full table scan. It also validates that find() is correct -// testing for probe rows that are both there and not. -// The hash table is rehashed a few times and the scans/finds are tested again. -TEST_F(HashTableTest, BasicTest) { - TupleRow* build_rows[5]; - TupleRow* scan_rows[5] = {0}; - - for (int i = 0; i < 5; ++i) { - build_rows[i] = create_tuple_row(i); - } - - ProbeTestData probe_rows[10]; - - for (int i = 0; i < 10; ++i) { - probe_rows[i].probe_row = create_tuple_row(i); - - if (i < 5) { - probe_rows[i].expected_build_rows.push_back(build_rows[i]); - } - } - - std::vector is_null_safe = {false}; - int initial_seed = 1; - int64_t num_buckets = 4; - HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, - num_buckets); - - for (int i = 0; i < 5; ++i) { - hash_table.insert(build_rows[i]); - } - - EXPECT_EQ(5, hash_table.size()); - - // Do a full table scan and validate returned pointers - full_scan(&hash_table, 0, 5, true, scan_rows, build_rows); - probe_test(&hash_table, probe_rows, 10, false); - - // Resize and scan again - resize_table(&hash_table, 64); - EXPECT_EQ(hash_table.num_buckets(), 64); - EXPECT_EQ(hash_table.size(), 5); - memset(scan_rows, 0, sizeof(scan_rows)); - full_scan(&hash_table, 0, 5, true, scan_rows, build_rows); - probe_test(&hash_table, probe_rows, 10, false); - - // Resize to two and cause some collisions - resize_table(&hash_table, 2); - EXPECT_EQ(hash_table.num_buckets(), 2); - EXPECT_EQ(hash_table.size(), 5); - memset(scan_rows, 0, sizeof(scan_rows)); - full_scan(&hash_table, 0, 5, true, scan_rows, build_rows); - probe_test(&hash_table, probe_rows, 10, false); - - // Resize to one and turn it into a linked list - resize_table(&hash_table, 1); - EXPECT_EQ(hash_table.num_buckets(), 1); - EXPECT_EQ(hash_table.size(), 5); - memset(scan_rows, 0, sizeof(scan_rows)); - full_scan(&hash_table, 0, 5, true, scan_rows, build_rows); - probe_test(&hash_table, probe_rows, 10, false); - hash_table.close(); -} - -// This tests makes sure we can scan ranges of buckets -TEST_F(HashTableTest, ScanTest) { - std::vector is_null_safe = {false}; - int initial_seed = 1; - int64_t num_buckets = 4; - HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, - num_buckets); - // Add 1 row with val 1, 2 with val 2, etc - std::vector build_rows; - ProbeTestData probe_rows[15]; - probe_rows[0].probe_row = create_tuple_row(0); - - for (int val = 1; val <= 10; ++val) { - probe_rows[val].probe_row = create_tuple_row(val); - - for (int i = 0; i < val; ++i) { - TupleRow* row = create_tuple_row(val); - hash_table.insert(row); - build_rows.push_back(row); - probe_rows[val].expected_build_rows.push_back(row); - } - } - - // Add some more probe rows that aren't there - for (int val = 11; val < 15; ++val) { - probe_rows[val].probe_row = create_tuple_row(val); - } - - // Test that all the builds were found - probe_test(&hash_table, probe_rows, 15, true); - - // Resize and try again - resize_table(&hash_table, 128); - EXPECT_EQ(hash_table.num_buckets(), 128); - probe_test(&hash_table, probe_rows, 15, true); - - resize_table(&hash_table, 16); - EXPECT_EQ(hash_table.num_buckets(), 16); - probe_test(&hash_table, probe_rows, 15, true); - - resize_table(&hash_table, 2); - EXPECT_EQ(hash_table.num_buckets(), 2); - probe_test(&hash_table, probe_rows, 15, true); - - hash_table.close(); -} - -// This test continues adding to the hash table to trigger the resize code paths -TEST_F(HashTableTest, GrowTableTest) { - int build_row_val = 0; - int num_to_add = LOOP_LESS_OR_MORE(2, 4); - int expected_size = 0; - - int mem_limit = 1024 * 1024; - std::vector is_null_safe = {false}; - int initial_seed = 1; - int64_t num_buckets = 4; - HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, - num_buckets); - EXPECT_FALSE(hash_table.mem_tracker()->consumption() > mem_limit); - - for (int i = 0; i < LOOP_LESS_OR_MORE(1, 20); ++i) { - for (int j = 0; j < num_to_add; ++build_row_val, ++j) { - hash_table.insert(create_tuple_row(build_row_val)); - } - - expected_size += num_to_add; - num_to_add *= 2; - EXPECT_EQ(hash_table.size(), expected_size); - } - LOG(INFO) << "consume:" << hash_table.mem_tracker()->consumption() - << ",expected_size:" << expected_size; - - EXPECT_EQ(LOOP_LESS_OR_MORE(0, 1), hash_table.mem_tracker()->consumption() > mem_limit); - - // Validate that we can find the entries - for (int i = 0; i < expected_size * 5; i += 100000) { - TupleRow* probe_row = create_tuple_row(i); - HashTable::Iterator iter = hash_table.find(probe_row); - - if (i < expected_size) { - EXPECT_TRUE(iter != hash_table.end()); - validate_match(probe_row, iter.get_row()); - } else { - EXPECT_TRUE(iter == hash_table.end()); - } - } - hash_table.close(); -} - -// This test continues adding to the hash table to trigger the resize code paths -TEST_F(HashTableTest, GrowTableTest2) { - int build_row_val = 0; - - std::vector is_null_safe = {false}; - int initial_seed = 1; - int64_t num_buckets = 4; - HashTable hash_table(_build_expr, _probe_expr, 1, false, is_null_safe, initial_seed, - num_buckets); - - LOG(INFO) << time(nullptr); - - // constexpr const int test_size = 5 * 1024 * 1024; - constexpr const int test_size = 5 * 1024 * 100; - - for (int i = 0; i < test_size; ++i) { - hash_table.insert(create_tuple_row(build_row_val++)); - } - - LOG(INFO) << time(nullptr); - - // Validate that we can find the entries - for (int i = 0; i < test_size; ++i) { - TupleRow* probe_row = create_tuple_row(i++); - hash_table.find(probe_row); - } - - LOG(INFO) << time(nullptr); - - size_t counter = 0; - auto func = [&](TupleRow* row) { counter++; }; - hash_table.for_each_row(func); - EXPECT_EQ(counter, hash_table.size()); - - hash_table.close(); -} - -} // namespace doris diff --git a/be/test/exec/hdfs_file_reader_test.cpp b/be/test/exec/hdfs_file_reader_test.cpp deleted file mode 100644 index 382c09f14d..0000000000 --- a/be/test/exec/hdfs_file_reader_test.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "io/hdfs_file_reader.h" - -#include - -#include "io/hdfs_reader_writer.h" - -namespace doris { - -class HdfsFileReaderTest : public testing::Test {}; - -TEST_F(HdfsFileReaderTest, test_connect_fail) { - THdfsParams hdfsParams; - hdfsParams.__set_fs_name("hdfs://127.0.0.9:8888"); // An invalid address - hdfsParams.__set_hdfs_kerberos_principal("somebody@TEST.COM"); - hdfsParams.__set_hdfs_kerberos_keytab("/etc/keytab/doris.keytab"); - std::vector confs; - THdfsConf item; - item.key = "dfs.ha.namenodes.service1"; - item.value = "n1,n2"; - confs.push_back(item); - hdfsParams.__set_hdfs_conf(confs); - HdfsFileReader hdfs_file_reader(hdfsParams, "/user/foo/test.data", 0); - Status status = hdfs_file_reader.open(); - EXPECT_EQ(ErrorCode::INTERNAL_ERROR, status.code()); - hdfs_file_reader.close(); -} - -} // end namespace doris diff --git a/be/test/exec/json_scanner_test.cpp b/be/test/exec/json_scanner_test.cpp deleted file mode 100644 index 5fbc3b3c3d..0000000000 --- a/be/test/exec/json_scanner_test.cpp +++ /dev/null @@ -1,630 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exec/broker_scan_node.h" -#include "exprs/cast_functions.h" -#include "exprs/decimalv2_operators.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class JsonScannerTest : public testing::Test { -public: - JsonScannerTest() : _runtime_state(TQueryGlobals()) { - init(); - _runtime_state.init_mem_trackers(); - _runtime_state._exec_env = ExecEnv::GetInstance(); - } - void init(); - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - DecimalV2Operators::init(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - int create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - int create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - void create_expr_info(); - void init_desc_table(); - RuntimeState _runtime_state; - ObjectPool _obj_pool; - std::map _slots_map; - TBrokerScanRangeParams _params; - DescriptorTbl* _desc_tbl; - TPlanNode _tnode; -}; - -#define TUPLE_ID_DST 0 -#define TUPLE_ID_SRC 1 -#define COLUMN_NUMBERS 6 -#define DST_TUPLE_SLOT_ID_START 1 -#define SRC_TUPLE_SLOT_ID_START 7 -int JsonScannerTest::create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { - const char* columnNames[] = {"category", "author", "title", "price", "largeint", "decimal"}; - for (int i = 0; i < COLUMN_NUMBERS; i++) { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = i; - slot_desc.byteOffset = i * 16 + 8; - slot_desc.nullIndicatorByte = i / 8; - slot_desc.nullIndicatorBit = i % 8; - slot_desc.colName = columnNames[i]; - slot_desc.slotIdx = i + 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - { - // TTupleDescriptor source - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_SRC; - t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -int JsonScannerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { - int32_t byteOffset = 8; - { //category - TSlotDescriptor slot_desc; - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 0; - slot_desc.colName = "category"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // author - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 1; - slot_desc.colName = "author"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // title - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 2; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 2; - slot_desc.colName = "title"; - slot_desc.slotIdx = 3; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // price - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DOUBLE); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 3; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 3; - slot_desc.colName = "price"; - slot_desc.slotIdx = 4; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 8; - { // lagreint - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::LARGEINT); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 4; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 4; - slot_desc.colName = "lagreint"; - slot_desc.slotIdx = 5; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // decimal - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__isset.precision = true; - scalar_type.__isset.scale = true; - scalar_type.__set_precision(-1); - scalar_type.__set_scale(-1); - scalar_type.__set_type(TPrimitiveType::DECIMALV2); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 5; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 5; - slot_desc.colName = "decimal"; - slot_desc.slotIdx = 6; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - t_desc_table.__isset.slotDescriptors = true; - { - // TTupleDescriptor dest - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_DST; - t_tuple_desc.byteSize = byteOffset + 8; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -void JsonScannerTest::init_desc_table() { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - int next_slot_id = 1; - - next_slot_id = create_dst_tuple(t_desc_table, next_slot_id); - - next_slot_id = create_src_tuple(t_desc_table, next_slot_id); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_state.set_desc_tbl(_desc_tbl); -} - -void JsonScannerTest::create_expr_info() { - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - // category VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START; // category id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START); - } - // author VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 1; // author id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 1, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 1); - } - // title VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 2; // log_time id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 2, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 2); - } - - // price VARCHAR --> DOUBLE - { - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodouble"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttodouble(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_double_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 3; // price id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 3, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 3); - } - // largeint VARCHAR --> LargeInt - { - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::LARGEINT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttolargeint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttolargeint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_large_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 4; // price id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 4, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 4); - } - // decimal VARCHAR --> Decimal - { - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__isset.precision = true; - scalar_type.__isset.scale = true; - scalar_type.__set_precision(-1); - scalar_type.__set_scale(-1); - scalar_type.__set_type(TPrimitiveType::DECIMALV2); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodecimalv2"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttodecimalv2(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::DecimalV2Operators::cast_to_decimalv2_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 5; // price id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 5, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 5); - } - // _params.__isset.expr_of_dest_slot = true; - _params.__set_dest_tuple_id(TUPLE_ID_DST); - _params.__set_src_tuple_id(TUPLE_ID_SRC); -} - -void JsonScannerTest::init() { - create_expr_info(); - init_desc_table(); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.broker_scan_node.tuple_id = 0; - _tnode.__isset.broker_scan_node = true; -} - -TEST_F(JsonScannerTest, normal_simple_arrayjson) { - BrokerScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - auto status = scan_node.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // set scan range - std::vector scan_ranges; - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - TBrokerRangeDesc range; - range.start_offset = 0; - range.size = -1; - range.format_type = TFileFormatType::FORMAT_JSON; - range.strip_outer_array = true; - range.__isset.strip_outer_array = true; - range.splittable = true; - range.path = "./be/test/exec/test_data/json_scanner/test_simple2.json"; - range.file_type = TFileType::FILE_LOCAL; - broker_scan_range.ranges.push_back(range); - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - scan_ranges.push_back(scan_range_params); - } - - scan_node.set_scan_ranges(scan_ranges); - status = scan_node.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); - bool eof = false; - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(2, batch.num_rows()); - // Do not use num_as_string, so largeint is too big is null and decimal value loss precision - auto tuple_str = - batch.get_row(1)->get_tuple(0)->to_string(*scan_node.row_desc().tuple_descriptors()[0]); - EXPECT_TRUE(tuple_str.find("1180591620717411303424") == tuple_str.npos); - EXPECT_TRUE(tuple_str.find("9999999999999.999999") == tuple_str.npos); - EXPECT_FALSE(eof); - batch.reset(); - - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(0, batch.num_rows()); - EXPECT_TRUE(eof); - - // Use num_as_string load data again - BrokerScanNode scan_node2(&_obj_pool, _tnode, *_desc_tbl); - scan_node2.init(_tnode); - status = scan_node2.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - scan_ranges.clear(); - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - TBrokerRangeDesc range; - range.start_offset = 0; - range.size = -1; - range.format_type = TFileFormatType::FORMAT_JSON; - range.strip_outer_array = true; - range.num_as_string = true; - range.__isset.strip_outer_array = true; - range.__isset.num_as_string = true; - range.splittable = true; - range.path = "./be/test/exec/test_data/json_scanner/test_simple2.json"; - range.file_type = TFileType::FILE_LOCAL; - broker_scan_range.ranges.push_back(range); - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - scan_ranges.push_back(scan_range_params); - } - scan_node2.set_scan_ranges(scan_ranges); - status = scan_node2.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - status = scan_node2.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(2, batch.num_rows()); - // Use num as string, load largeint, decimal successfully - tuple_str = batch.get_row(1)->get_tuple(0)->to_string( - *scan_node2.row_desc().tuple_descriptors()[0]); - EXPECT_FALSE(tuple_str.find("1180591620717411303424") == tuple_str.npos); - EXPECT_FALSE(tuple_str.find("9999999999999.999999") == tuple_str.npos); - - scan_node.close(&_runtime_state); - scan_node2.close(&_runtime_state); - { - std::stringstream ss; - scan_node.runtime_profile()->pretty_print(&ss); - LOG(INFO) << ss.str(); - } -} - -} // namespace doris diff --git a/be/test/exec/json_scanner_with_jsonpath_test.cpp b/be/test/exec/json_scanner_with_jsonpath_test.cpp deleted file mode 100644 index e54113c0bc..0000000000 --- a/be/test/exec/json_scanner_with_jsonpath_test.cpp +++ /dev/null @@ -1,418 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exec/broker_scan_node.h" -#include "exprs/cast_functions.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class JsonScannerWithJsonPathTest : public testing::Test { -public: - JsonScannerWithJsonPathTest() : _runtime_state(TQueryGlobals()) { - init(); - _runtime_state.init_mem_trackers(); - _runtime_state._exec_env = ExecEnv::GetInstance(); - } - void init(); - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - int create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - int create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - void create_expr_info(); - void init_desc_table(); - RuntimeState _runtime_state; - ObjectPool _obj_pool; - std::map _slots_map; - TBrokerScanRangeParams _params; - DescriptorTbl* _desc_tbl; - TPlanNode _tnode; -}; - -#define TUPLE_ID_DST 0 -#define TUPLE_ID_SRC 1 -#define COLUMN_NUMBERS 4 -#define DST_TUPLE_SLOT_ID_START 1 -#define SRC_TUPLE_SLOT_ID_START 5 -int JsonScannerWithJsonPathTest::create_src_tuple(TDescriptorTable& t_desc_table, - int next_slot_id) { - const char* columnNames[] = {"k1", "kind", "ip", "value"}; - for (int i = 0; i < COLUMN_NUMBERS; i++) { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = i; - slot_desc.byteOffset = i * 16 + 8; - slot_desc.nullIndicatorByte = i / 8; - slot_desc.nullIndicatorBit = i % 8; - slot_desc.colName = columnNames[i]; - slot_desc.slotIdx = i + 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - { - // TTupleDescriptor source - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_SRC; - t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -int JsonScannerWithJsonPathTest::create_dst_tuple(TDescriptorTable& t_desc_table, - int next_slot_id) { - int32_t byteOffset = 8; - { //k1 - TSlotDescriptor slot_desc; - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 0; - slot_desc.colName = "k1"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { //kind - TSlotDescriptor slot_desc; - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 1; - slot_desc.colName = "kind"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // ip - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 2; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 2; - slot_desc.colName = "ip"; - slot_desc.slotIdx = 3; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // value - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 3; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 3; - slot_desc.colName = "value"; - slot_desc.slotIdx = 4; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - t_desc_table.__isset.slotDescriptors = true; - { - // TTupleDescriptor dest - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_DST; - t_tuple_desc.byteSize = byteOffset + 8; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -void JsonScannerWithJsonPathTest::init_desc_table() { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - int next_slot_id = 1; - - next_slot_id = create_dst_tuple(t_desc_table, next_slot_id); - - next_slot_id = create_src_tuple(t_desc_table, next_slot_id); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_state.set_desc_tbl(_desc_tbl); -} - -void JsonScannerWithJsonPathTest::create_expr_info() { - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - // k1 VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START; // k1 id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START); - } - // kind VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 1; // kind id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 1, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 1); - } - // ip VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 2; // ip id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 2, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 2); - } - // value VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 3; // valuep id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 3, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 3); - } - - // _params.__isset.expr_of_dest_slot = true; - _params.__set_dest_tuple_id(TUPLE_ID_DST); - _params.__set_src_tuple_id(TUPLE_ID_SRC); -} - -void JsonScannerWithJsonPathTest::init() { - create_expr_info(); - init_desc_table(); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.broker_scan_node.tuple_id = 0; - _tnode.__isset.broker_scan_node = true; -} - -TEST_F(JsonScannerWithJsonPathTest, normal) { - BrokerScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - auto status = scan_node.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // set scan range - std::vector scan_ranges; - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - TBrokerRangeDesc range; - range.start_offset = 0; - range.size = -1; - range.format_type = TFileFormatType::FORMAT_JSON; - range.splittable = true; - range.__isset.strip_outer_array = true; - range.strip_outer_array = true; - range.__isset.jsonpaths = true; - range.jsonpaths = "[\"$.k1\", \"$.kind\", \"$.keyname.ip\", \"$.keyname.value\"]"; - - range.path = "./be/test/exec/test_data/json_scanner/test_array.json"; - range.file_type = TFileType::FILE_LOCAL; - broker_scan_range.ranges.push_back(range); - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - scan_ranges.push_back(scan_range_params); - } - - scan_node.set_scan_ranges(scan_ranges); - status = scan_node.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); - bool eof = false; - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(2, batch.num_rows()); - EXPECT_FALSE(eof); - batch.reset(); - - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(0, batch.num_rows()); - EXPECT_TRUE(eof); - - scan_node.close(&_runtime_state); - { - std::stringstream ss; - scan_node.runtime_profile()->pretty_print(&ss); - LOG(INFO) << ss.str(); - } -} - -} // namespace doris diff --git a/be/test/exec/multi_bytes_separator_test.cpp b/be/test/exec/multi_bytes_separator_test.cpp deleted file mode 100644 index d514d3b538..0000000000 --- a/be/test/exec/multi_bytes_separator_test.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exec/broker_scanner.h" -#include "exprs/cast_functions.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class MultiBytesSeparatorTest : public testing::Test { -public: - MultiBytesSeparatorTest() : _runtime_state(TQueryGlobals()) {} - -private: - RuntimeState _runtime_state; - -protected: - virtual void SetUp() {} - virtual void TearDown() {} -}; - -TEST_F(MultiBytesSeparatorTest, normal) { - TBrokerScanRangeParams params; - params.column_separator = ','; - params.line_delimiter = '\n'; - params.column_separator_str = "AAAA"; - params.line_delimiter_str = "BBB"; - params.column_separator_length = 4; - params.line_delimiter_length = 3; - - const std::vector ranges; - const std::vector broker_addresses; - const std::vector pre_filter_texprs; - BrokerScanner scanner(&_runtime_state, nullptr, params, ranges, broker_addresses, - pre_filter_texprs, nullptr); - -#define private public - - // 1. - { - std::string line = "AAAA"; - Slice s(line); - scanner.split_line(s); - EXPECT_EQ(2, scanner._split_values.size()); - EXPECT_EQ(0, scanner._split_values[0].size); - EXPECT_EQ(0, scanner._split_values[1].size); - } - - // 2. - { - std::string line = "ABAA"; - Slice s(line); - scanner.split_line(s); - EXPECT_EQ(1, scanner._split_values.size()); - EXPECT_EQ(4, scanner._split_values[0].size); - } - - // 3. - { - std::string line = ""; - Slice s(line); - scanner.split_line(s); - EXPECT_EQ(1, scanner._split_values.size()); - EXPECT_EQ(0, scanner._split_values[0].size); - } - - // 4. - { - // 1234, AAAB, , AA - std::string line = "1234AAAAAAABAAAAAAAAAA"; - Slice s(line); - scanner.split_line(s); - EXPECT_EQ(4, scanner._split_values.size()); - EXPECT_EQ(4, scanner._split_values[0].size); - EXPECT_EQ(4, scanner._split_values[1].size); - EXPECT_EQ(0, scanner._split_values[2].size); - EXPECT_EQ(2, scanner._split_values[3].size); - } -} - -} // end namespace doris diff --git a/be/test/exec/mysql_scan_node_test.cpp b/be/test/exec/mysql_scan_node_test.cpp deleted file mode 100644 index cc46f87c9a..0000000000 --- a/be/test/exec/mysql_scan_node_test.cpp +++ /dev/null @@ -1,280 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/mysql_scan_node.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/text_converter.inline.h" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "schema_scan_node.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -using std::vector; - -namespace doris { - -// mock -class MysqlScanNodeTest : public testing::Test { -public: - MysqlScanNodeTest() : _runtim_state("test") { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::MYSQL_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.mysqlTable.tableName = "table"; - t_table_desc.mysqlTable.mysqlHost = "host"; - t_table_desc.mysqlTable.mysqlPort = "port"; - t_table_desc.mysqlTable.mysqlUser = "user"; - t_table_desc.mysqlTable.mysqlPasswd = "passwd"; - t_table_desc.tableName = "table"; - t_table_desc.dbName = "db"; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - // TSlotDescriptor - int offset = 1; - int i = 0; - // dummy - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(false); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int); - } - // id - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int); - } - ++i; - // model - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_STRING)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(0); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - ++i; - // price - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_STRING)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - ++i; - // grade - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_STRING)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(2); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - - t_desc_table.__isset.slotDescriptors = true; - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtim_state.set_desc_tbl(_desc_tbl); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.mysql_scan_node.tuple_id = 0; - _tnode.mysql_scan_node.table_name = "dim_lbs_device"; - _tnode.mysql_scan_node.columns.push_back("*"); - _tnode.mysql_scan_node.filters.push_back("id = 1"); - _tnode.__isset.mysql_scan_node = true; - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - TPlanNode _tnode; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState _runtim_state; -}; - -TEST_F(MysqlScanNodeTest, normal_use) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - std::vector scan_ranges; - status = scan_node.set_scan_ranges(scan_ranges); - EXPECT_TRUE(status.ok()); - std::stringstream out; - scan_node.debug_string(1, &out); - LOG(WARNING) << out.str(); - - status = scan_node.open(&_runtim_state); - EXPECT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&_runtim_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - if (!eos) { - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - } - } - } - - status = scan_node.close(&_runtim_state); - EXPECT_TRUE(status.ok()); -} -TEST_F(MysqlScanNodeTest, Prepare_fail_1) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node._tuple_id = 1; - Status status = scan_node.prepare(&_runtim_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(MysqlScanNodeTest, Prepare_fail_2) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - TableDescriptor* old = _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc; - _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc = nullptr; - Status status = scan_node.prepare(&_runtim_state); - EXPECT_FALSE(status.ok()); - _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc = old; -} -TEST_F(MysqlScanNodeTest, open_fail_1) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - scan_node._table_name = "no_such_table"; - status = scan_node.open(&_runtim_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(MysqlScanNodeTest, open_fail_3) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - scan_node._columns.clear(); - scan_node._columns.push_back("id"); - status = scan_node.open(&_runtim_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(MysqlScanNodeTest, open_fail_2) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - scan_node._my_param.host = ""; - status = scan_node.open(&_runtim_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(MysqlScanNodeTest, invalid_input) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(nullptr); - EXPECT_FALSE(status.ok()); - status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - status = scan_node.prepare(&_runtim_state); - EXPECT_TRUE(status.ok()); - status = scan_node.open(nullptr); - EXPECT_FALSE(status.ok()); - status = scan_node.open(&_runtim_state); - EXPECT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos = false; - status = scan_node.get_next(nullptr, &row_batch, &eos); - EXPECT_FALSE(status.ok()); - - while (!eos) { - status = scan_node.get_next(&_runtim_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - } - } -} -TEST_F(MysqlScanNodeTest, no_init) { - MysqlScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.open(&_runtim_state); - EXPECT_FALSE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos = false; - status = scan_node.get_next(&_runtim_state, &row_batch, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/mysql_scanner_test.cpp b/be/test/exec/mysql_scanner_test.cpp deleted file mode 100644 index 4a95cef2b3..0000000000 --- a/be/test/exec/mysql_scanner_test.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/mysql_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -class MysqlScannerTest : public testing::Test { -public: - MysqlScannerTest() { - _param.host = "host"; - _param.port = "port"; - _param.user = "user"; - _param.passwd = "passwd"; - _param.db = "db"; - } - -protected: - virtual void SetUp() {} - MysqlScannerParam _param; -}; - -TEST_F(MysqlScannerTest, normal_use) { - MysqlScanner scanner(_param); - Status status = scanner.open(); - EXPECT_TRUE(status.ok()); - std::vector fields; - fields.push_back("*"); - std::vector filters; - filters.push_back("id = 1"); - status = scanner.query("dim_lbs_device", fields, filters); - EXPECT_TRUE(status.ok()); - bool eos = false; - char** buf; - unsigned long* length; - status = scanner.get_next_row(nullptr, &length, &eos); - EXPECT_FALSE(status.ok()); - - while (!eos) { - status = scanner.get_next_row(&buf, &length, &eos); - - if (eos) { - break; - } - - EXPECT_TRUE(status.ok()); - - for (int i = 0; i < scanner.field_num(); ++i) { - if (buf[i]) { - LOG(WARNING) << buf[i]; - } else { - LOG(WARNING) << "NULL"; - } - } - } -} - -TEST_F(MysqlScannerTest, no_init) { - MysqlScanner scanner(_param); - std::vector fields; - fields.push_back("*"); - std::vector filters; - filters.push_back("id = 1"); - Status status = scanner.query("dim_lbs_device", fields, filters); - EXPECT_FALSE(status.ok()); - status = scanner.query("select 1"); - EXPECT_FALSE(status.ok()); - bool eos = false; - char** buf; - unsigned long* length; - status = scanner.get_next_row(&buf, &length, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(MysqlScannerTest, query_failed) { - MysqlScanner scanner(_param); - Status status = scanner.open(); - EXPECT_TRUE(status.ok()); - std::vector fields; - fields.push_back("*"); - std::vector filters; - filters.push_back("id = 1"); - status = scanner.query("no_such_table", fields, filters); - EXPECT_FALSE(status.ok()); -} - -TEST_F(MysqlScannerTest, open_failed) { - MysqlScannerParam invalid_param; - MysqlScanner scanner(invalid_param); - Status status = scanner.open(); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/new_olap_scan_node_test.cpp b/be/test/exec/new_olap_scan_node_test.cpp deleted file mode 100644 index ca78c58f41..0000000000 --- a/be/test/exec/new_olap_scan_node_test.cpp +++ /dev/null @@ -1,388 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "exec/olap_scan_node.h" -#include "gen_cpp/PlanNodes_types.h" -#include "olap/batch_reader_interface.h" -#include "olap/field.h" -#include "olap/olap_configure.h" -#include "olap/olap_reader.h" -#include "olap/session_manager.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/primitive_type.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -namespace doris { - -//using namespace testing; -//using namespace olap::storage; -//using namespace doris; -//using namespace std; - -class TestOlapScanNode : public testing::Test { -public: - TestOlapScanNode() : _runtime_stat("test") {} - - void SetUp() { - init_olap(); - init_scan_node(); - } - - void TearDown() { - StorageEngine::get_instance()->clear(); - SessionManager::get_instance()->delete_session_by_fd(123); - - system("rm -rf ./testrun"); - } - - void init_olap() { - system("mkdir -p ./testrun"); - system("cp -r ./testdata/case3 ./testrun/."); - - string tables_root_path = "./testrun/case3"; - memcpy(OLAPConfigure::get_instance()->_tables_root_path, tables_root_path.c_str(), - tables_root_path.size()); - string unused_flag_path = "./testrun/unused_flag"; - memcpy(OLAPConfigure::get_instance()->_unused_flag_path, unused_flag_path.c_str(), - unused_flag_path.size()); - - StorageEngine::get_instance()->_lru_cache = newLRU_cache(10000); - - _tablet_meta = new TabletMeta( - "./testrun/case3/clickuserid_online_userid_type_planid_unitid_winfoid.hdr"); - _tablet_meta->load(); - tablet = new Tablet(_tablet_meta); - tablet->load_indices(); - tablet->_root_path_name = "./testrun/case3"; - - TableDescription description("fc", "clickuserid_online", - "userid_type_planid_unitid_winfoid"); - StorageEngine::get_instance()->add_table(description, tablet); - - // init session manager - SessionManager::get_instance()->init(); - } - - void init_scan_node() { - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.disable_codegen = true; - ExecEnv* exec_env = new ExecEnv(); - _runtime_stat.init(fragment_id, query_options, "test", exec_env); - - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - // TSlotDescriptor - int offset = 1; - int i = 0; - // UserId - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("userid"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // planid - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("planid"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // winfoid - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("winfoid"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // pv - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("pv"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // pay - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("pay"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - - t_desc_table.__isset.slotDescriptors = true; - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_stat.set_desc_tbl(_desc_tbl); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::OLAP_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.tuple_ids.push_back(0); - _tnode.olap_scan_node.tuple_id = 0; - _tnode.olap_scan_node.key_column_name.push_back("userid"); - _tnode.olap_scan_node.key_column_type.push_back(to_thrift(TYPE_INT)); - _tnode.__isset.olap_scan_node = true; - - { - TScanRangeParams param; - TPaloScanRange doris_scan_range; - TNetworkAddress host; - host.__set_hostname("host"); - host.__set_port(port); - doris_scan_range.hosts.push_back(host); - doris_scan_range.__set_schema_hash("1709394"); - doris_scan_range.__set_version("0"); - // Useless but it is required in TPaloScanRange - doris_scan_range.__set_version_hash("0"); - config::olap_index_name = "userid_type_planid_unitid_winfoid"; - doris_scan_range.engine_table_name.push_back("clickuserid_online"); - doris_scan_range.__set_db_name("fc"); - param.scan_range.__set_doris_scan_range(doris_scan_range); - _scan_ranges.push_back(param); - } - } - - void read_data(int version, std::vector* data) { - data->clear(); - - int row[21]; - - for (int i = 0; i <= version; ++i) { - std::stringstream ss; - ss << "./testrun/case3/_fc_dayhour" << i << ".txt"; - fstream f(ss.str()); - - while (true) { - for (int j = 0; j < 21; ++j) { - f >> row[j]; - } - - if (f.eof()) { - break; - } - - std::stringstream str; - str << "[("; - str << row[0] << " "; - str << row[2] << " "; - str << row[4] << " "; - str << row[18] << " "; - str << row[20] << ")]"; - data->push_back(str.str()); - VLOG_NOTICE << "Read Row: " << str.str(); - } - } - } - -private: - TabletMeta* _tablet_meta; - Tablet* tablet; - - TPlanNode _tnode; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState _runtime_stat; - std::vector _scan_ranges; -}; - -TEST_F(TestOlapScanNode, SimpleTest) { - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - int num_rows = 0; - bool eos = false; - - while (!eos) { - row_batch.reset(); - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - VLOG_CRITICAL << "num_rows: " << row_batch.num_rows(); - num_rows += row_batch.num_rows(); - } - - EXPECT_EQ(num_rows, 1000); - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -TEST_F(TestOlapScanNode, MultiColumnSingleVersionTest) { - _scan_ranges[0].scan_range.doris_scan_range.__set_version("0"); - // Useless but it is required in TPaloScanRange - _scan_ranges[0].scan_range.doris_scan_range.__set_version_hash("0"); - std::vector data; - read_data(0, &data); - - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - int num_rows = 0; - bool eos = false; - int data_index = 0; - - while (!eos) { - row_batch.reset(); - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - VLOG_NOTICE << "input row: " << print_row(row, scan_node._row_descriptor); - EXPECT_LT(data_index, data.size()); - EXPECT_EQ(data[data_index], print_row(row, scan_node._row_descriptor)); - ++data_index; - } - - num_rows += row_batch.num_rows(); - } - - EXPECT_EQ(num_rows, data.size()); - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -TEST_F(TestOlapScanNode, MultiColumnMultiVersionTest) { - _scan_ranges[0].scan_range.doris_scan_range.__set_version("9"); - // Useless but it is required in TPaloScanRange - _scan_ranges[0].scan_range.doris_scan_range.__set_version_hash("0"); - std::vector data; - read_data(9, &data); - - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - int num_rows = 0; - bool eos = false; - int data_index = 0; - - while (!eos) { - row_batch.reset(); - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - VLOG_NOTICE << "input row: " << print_row(row, scan_node._row_descriptor); - EXPECT_LT(data_index, data.size()); - EXPECT_EQ(data[data_index], print_row(row, scan_node._row_descriptor)); - ++data_index; - } - - num_rows += row_batch.num_rows(); - } - - EXPECT_EQ(num_rows, data.size()); - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -} // namespace doris - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/be/test/exec/olap_common_test.cpp b/be/test/exec/olap_common_test.cpp deleted file mode 100644 index adf152bf0f..0000000000 --- a/be/test/exec/olap_common_test.cpp +++ /dev/null @@ -1,779 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include -#include -#define protected public -#define private public - -#include "exec/olap_common.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/descriptors.h" -#include "util/cpu_info.h" -#include "util/runtime_profile.h" - -namespace doris { - -void construct_scan_range(TPaloScanRange* doris_scan_range) { - TNetworkAddress host; - host.__set_hostname("jx-ps-dise174.jx"); - host.__set_port(8010); - doris_scan_range->hosts.push_back(host); - doris_scan_range->__set_schema_hash("216424022"); - doris_scan_range->__set_version("0"); - // Useless but it is required in TPaloScanRange - doris_scan_range->__set_version_hash("0"); - // doris_scan_range->engine_table_name.push_back("ShowQStats"); - doris_scan_range->__set_db_name("olap"); - TKeyRange key_range; - key_range.__set_column_type(to_thrift(TYPE_INT)); - key_range.__set_begin_key(-1000); - key_range.__set_end_key(1000); - key_range.__set_column_name("partition_column"); - doris_scan_range->partition_column_ranges.push_back(key_range); - doris_scan_range->__isset.partition_column_ranges = true; -} - -class ColumnValueRangeTest : public ::testing::Test { -public: - virtual void SetUp() {} - - virtual void TearDown() {} -}; - -TEST_F(ColumnValueRangeTest, ExceptionCase) { - ColumnValueRange range1; - EXPECT_FALSE(range1.add_fixed_value(10).ok()); - EXPECT_FALSE(range1.add_range(FILTER_LESS_OR_EQUAL, 10).ok()); -} - -TEST_F(ColumnValueRangeTest, NormalCase) { - ColumnValueRange range1("col"); - - EXPECT_TRUE(range1.add_fixed_value(10).ok()); - EXPECT_TRUE(range1.add_fixed_value(20).ok()); - EXPECT_TRUE(range1.add_fixed_value(30).ok()); - - EXPECT_TRUE(range1.is_fixed_value_range()); - - EXPECT_TRUE(range1.add_range(FILTER_LESS, 30).ok()); - EXPECT_FALSE(range1.is_empty_value_range()); - - ColumnValueRange range2("col"); - EXPECT_TRUE(range2.add_fixed_value(30).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - EXPECT_TRUE(range2.add_fixed_value(20).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - - EXPECT_TRUE(range2.is_fixed_value_range()); - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 50).ok()); - EXPECT_FALSE(range2.is_fixed_value_range()); - - EXPECT_TRUE(range2.is_empty_value_range()); - EXPECT_FALSE(range1.has_intersection(range2)); -} - -TEST_F(ColumnValueRangeTest, FixedAddRangeTest) { - ColumnValueRange range1("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range1.add_range(FILTER_LARGER_OR_EQUAL, 10).ok()); - std::set res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(0), 0); - - for (int i = 10; i < 100; i += 10) { - EXPECT_EQ(res_set.count(i), 1); - } - - EXPECT_TRUE(range1.add_range(FILTER_LARGER, 20).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(10), 0); - EXPECT_EQ(res_set.count(20), 0); - - for (int i = 30; i < 100; i += 10) { - EXPECT_EQ(res_set.count(i), 1); - } - - EXPECT_TRUE(range1.add_range(FILTER_LESS, 90).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(90), 0); - - for (int i = 30; i < 90; i += 10) { - EXPECT_EQ(res_set.count(i), 1); - } - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 70).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(80), 0); - - for (int i = 30; i < 80; i += 10) { - EXPECT_EQ(res_set.count(i), 1); - } - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 30).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(30), 1); - - for (int i = 40; i < 80; i += 10) { - EXPECT_EQ(res_set.count(i), 0); - } - - EXPECT_TRUE(range1.add_range(FILTER_LARGER_OR_EQUAL, 30).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(30), 1); - - EXPECT_TRUE(range1.add_range(FILTER_LARGER, 30).ok()); - res_set = range1.get_fixed_value_set(); - EXPECT_EQ(res_set.count(30), 0); -} - -TEST_F(ColumnValueRangeTest, ContainsNullTest) { - ColumnValueRange range1("col"); - - // test fixed value range intersection with null and no null range - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - auto null_range = ColumnValueRange::create_empty_column_value_range(); - null_range.set_contain_null(true); - EXPECT_TRUE(!null_range.is_empty_value_range()); - null_range.intersection(range1); - EXPECT_TRUE(null_range.is_empty_value_range()); - - auto no_null_range = ColumnValueRange::create_empty_column_value_range(); - no_null_range.set_contain_null(false); - no_null_range.intersection(range1); - EXPECT_EQ(no_null_range._fixed_values, range1._fixed_values); - EXPECT_EQ(no_null_range._contain_null, range1._contain_null); - - // test scoped value range intersection with null and no null range - range1.set_whole_value_range(); - range1.add_range(FILTER_LESS_OR_EQUAL, 80); - range1.add_range(FILTER_LARGER, 50); - - null_range = ColumnValueRange::create_empty_column_value_range(); - null_range.set_contain_null(true); - EXPECT_TRUE(!null_range.is_empty_value_range()); - null_range.intersection(range1); - EXPECT_TRUE(null_range.is_empty_value_range()); - - no_null_range = ColumnValueRange::create_empty_column_value_range(); - no_null_range.set_contain_null(false); - no_null_range.intersection(range1); - EXPECT_TRUE(no_null_range._fixed_values.empty()); - EXPECT_EQ(no_null_range._low_value, range1._low_value); - EXPECT_EQ(no_null_range._high_value, range1._high_value); - EXPECT_EQ(no_null_range._contain_null, range1._contain_null); -} - -TEST_F(ColumnValueRangeTest, RangeAddRangeTest) { - ColumnValueRange range1("col"); - - EXPECT_EQ(range1.get_range_min_value(), std::numeric_limits::min()); - EXPECT_EQ(range1.get_range_max_value(), std::numeric_limits::max()); - - EXPECT_TRUE(range1.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_EQ(range1.get_range_min_value(), 20); - - EXPECT_TRUE(range1.add_range(FILTER_LARGER, 30).ok()); - EXPECT_EQ(range1.get_range_min_value(), 30); - - EXPECT_TRUE(range1.add_range(FILTER_LESS, 100).ok()); - EXPECT_EQ(range1.get_range_max_value(), 100); - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 90).ok()); - EXPECT_EQ(range1.get_range_max_value(), 90); - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 31).ok()); - EXPECT_EQ(range1.get_range_max_value(), 31); - - EXPECT_TRUE(range1.add_range(FILTER_LESS, 31).ok()); - EXPECT_FALSE(range1.is_empty_value_range()); - - EXPECT_TRUE(range1.add_range(FILTER_LESS, 30).ok()); - EXPECT_TRUE(range1.is_empty_value_range()); -} - -TEST_F(ColumnValueRangeTest, RangeIntersectionTest) { - ColumnValueRange range1("col"); - EXPECT_TRUE(range1.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - - ColumnValueRange range2("col"); - EXPECT_TRUE(range2.add_range(FILTER_LESS, 100).ok()); - - EXPECT_TRUE(range1.has_intersection(range2)); - - // test intersection operation - auto intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._low_value, 20); - EXPECT_EQ(intersection_range._low_op, FILTER_LARGER_OR_EQUAL); - EXPECT_EQ(intersection_range._high_value, 100); - EXPECT_EQ(intersection_range._high_op, FILTER_LESS); - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 80).ok()); - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 40).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._low_value, 40); - EXPECT_EQ(intersection_range._low_op, FILTER_LARGER); - EXPECT_EQ(intersection_range._high_value, 80); - EXPECT_EQ(intersection_range._high_op, FILTER_LESS_OR_EQUAL); - - EXPECT_TRUE(range1.add_range(FILTER_LESS_OR_EQUAL, 40).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_TRUE(intersection_range.is_empty_value_range()); -} - -TEST_F(ColumnValueRangeTest, FixedValueIntersectionTest) { - ColumnValueRange range1("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - ColumnValueRange range2("col"); - - for (int i = 50; i < 200; i += 10) { - EXPECT_TRUE(range2.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range1.has_intersection(range2)); - // test intersection operation - auto intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._fixed_values.size(), 5); - EXPECT_TRUE(intersection_range._fixed_values.count(50) == 1); - EXPECT_TRUE(intersection_range._fixed_values.count(90) == 1); - - EXPECT_TRUE(range2.add_range(FILTER_LESS_OR_EQUAL, 70).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._fixed_values.size(), 3); - EXPECT_TRUE(intersection_range._fixed_values.count(50) == 1); - EXPECT_TRUE(intersection_range._fixed_values.count(70) == 1); - - EXPECT_TRUE(range1.add_range(FILTER_LARGER_OR_EQUAL, 50).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._fixed_values.size(), 3); - EXPECT_TRUE(intersection_range._fixed_values.count(50) == 1); - EXPECT_TRUE(intersection_range._fixed_values.count(70) == 1); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 60).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_EQ(intersection_range._fixed_values.size(), 1); - EXPECT_TRUE(intersection_range._fixed_values.count(50) == 1); - - EXPECT_TRUE(range1.add_range(FILTER_LARGER, 50).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - intersection_range = range1; - intersection_range.intersection(range2); - EXPECT_TRUE(intersection_range.is_empty_value_range()); -} - -TEST_F(ColumnValueRangeTest, FixedAndRangeIntersectionTest) { - for (int type = TYPE_TINYINT; type <= TYPE_BIGINT; type++) { - switch (type) { - case TYPE_TINYINT: { - ColumnValueRange range1("col"); - ColumnValueRange range2("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 40).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LESS, 0).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - } - - case TYPE_SMALLINT: { - ColumnValueRange range1("col"); - ColumnValueRange range2("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 40).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LESS, 0).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - } - - case TYPE_INT: { - ColumnValueRange range1("col"); - ColumnValueRange range2("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 40).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LESS, 0).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - } - - case TYPE_BIGINT: { - ColumnValueRange range1("col"); - ColumnValueRange range2("col"); - - for (int i = 0; i < 100; i += 10) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - EXPECT_TRUE(range1.has_intersection(range2)); - EXPECT_TRUE(range2.has_intersection(range1)); - - EXPECT_TRUE(range2.add_range(FILTER_LARGER, 40).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LESS, 0).ok()); - EXPECT_FALSE(range1.has_intersection(range2)); - } - - default: - break; - } - } -} - -class OlapScanKeysTest : public ::testing::Test { -public: - virtual void SetUp() {} - - virtual void TearDown() {} -}; - -TEST_F(OlapScanKeysTest, ExtendFixedTest) { - OlapScanKeys scan_keys; - - ColumnValueRange range1("col"); - - for (int i = 0; i < 3; ++i) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - bool exact_range = true; - int max_key_range = 1024; - bool eos = false; - scan_keys.extend_scan_key(range1, max_key_range, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - std::vector> key_range; - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 3); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "0"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "0"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "1"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->end_scan_range), "1"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->begin_scan_range), "2"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->end_scan_range), "2"); - - ColumnValueRange range2("col"); - - for (int i = 0; i < 2; ++i) { - EXPECT_TRUE(range2.add_fixed_value(i).ok()); - } - - exact_range = true; - scan_keys.extend_scan_key(range2, max_key_range, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 6); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "0,0"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "0,0"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "1,0"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->end_scan_range), "1,0"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->begin_scan_range), "2,0"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->end_scan_range), "2,0"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[3]->begin_scan_range), "0,1"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[3]->end_scan_range), "0,1"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[4]->begin_scan_range), "1,1"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[4]->end_scan_range), "1,1"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[5]->begin_scan_range), "2,1"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[5]->end_scan_range), "2,1"); - - range2.set_whole_value_range(); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 100).ok()); - - exact_range = true; - scan_keys.extend_scan_key(range2, max_key_range, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - scan_keys.get_key_range(&key_range); - EXPECT_EQ(key_range.size(), 6); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "0,0,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "0,0,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "1,0,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->end_scan_range), "1,0,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->begin_scan_range), "2,0,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->end_scan_range), "2,0,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[3]->begin_scan_range), "0,1,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[3]->end_scan_range), "0,1,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[4]->begin_scan_range), "1,1,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[4]->end_scan_range), "1,1,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[5]->begin_scan_range), "2,1,100"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[5]->end_scan_range), "2,1,2147483647"); -} - -TEST_F(OlapScanKeysTest, ExtendFixedAndRangeTest) { - OlapScanKeys scan_keys; - - ColumnValueRange range1("col"); - - for (int i = 0; i < 3; ++i) { - EXPECT_TRUE(range1.add_fixed_value(i).ok()); - } - - bool exact_range = true; - int max_scan_key_num = 1024; - bool eos = false; - scan_keys.extend_scan_key(range1, max_scan_key_num, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - ColumnValueRange range2("col"); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - - exact_range = true; - scan_keys.extend_scan_key(range2, max_scan_key_num, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - std::vector> key_range; - - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 3); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "0,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "0,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "1,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->end_scan_range), "1,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->begin_scan_range), "2,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->end_scan_range), "2,2147483647"); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 100).ok()); - - exact_range = true; - scan_keys.extend_scan_key(range2, max_scan_key_num, &exact_range, &eos); - EXPECT_EQ(exact_range, true); - - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 3); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "0,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "0,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "1,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->end_scan_range), "1,2147483647"); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->begin_scan_range), "2,20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[2]->end_scan_range), "2,2147483647"); -} - -TEST_F(OlapScanKeysTest, ExtendRangeTest) { - OlapScanKeys scan_keys; - config::doris_max_scan_key_num = 1; - - ColumnValueRange range2("col"); - EXPECT_TRUE(range2.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - EXPECT_TRUE(range2.add_range(FILTER_LESS_OR_EQUAL, 100).ok()); - - bool exact_range = true; - bool eos = false; - EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - - std::vector> key_range; - - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 81); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "20"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[80]->end_scan_range), "100"); - - EXPECT_TRUE(range2.add_range(FILTER_LESS, 50).ok()); - - exact_range = true; - EXPECT_TRUE(scan_keys.extend_scan_key(range2, 1024, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 81); - - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "20,20"); - EXPECT_TRUE(key_range[0]->begin_include); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range), "20,50"); - EXPECT_FALSE(key_range[0]->end_include); -} - -TEST_F(OlapScanKeysTest, EachtypeTest) { - std::vector> key_range; - - { - OlapScanKeys scan_keys; - ColumnValueRange range("col"); - bool exact_range = true; - bool eos = false; - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - scan_keys.get_key_range(&key_range); - // contain null, [-128, 127] - EXPECT_EQ(key_range.size(), 257); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "-128"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[256]->end_scan_range), "127"); - - EXPECT_TRUE(range.add_range(FILTER_LESS, 50).ok()); - scan_keys.clear(); - exact_range = true; - EXPECT_TRUE(scan_keys.extend_scan_key(range, 1024, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), 178); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "-128"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[177]->end_scan_range), "49"); - } - - { - OlapScanKeys scan_keys; - ColumnValueRange range("col"); - bool exact_range = true; - int max_scan_key = 48; - bool eos = false; - EXPECT_TRUE(scan_keys.extend_scan_key(range, max_scan_key, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - scan_keys.get_key_range(&key_range); - EXPECT_EQ(key_range.size(), 49); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range), "-32768"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[max_scan_key]->end_scan_range), "32767"); - - EXPECT_TRUE(range.add_range(FILTER_LARGER, 0).ok()); - scan_keys.clear(); - exact_range = true; - EXPECT_TRUE(scan_keys.extend_scan_key(range, max_scan_key, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), max_scan_key); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "1"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[max_scan_key - 1]->end_scan_range), "32767"); - - EXPECT_TRUE(range.add_range(FILTER_LESS, 32766).ok()); - scan_keys.clear(); - exact_range = true; - EXPECT_TRUE(scan_keys.extend_scan_key(range, max_scan_key, &exact_range, &eos).ok()); - EXPECT_EQ(exact_range, true); - scan_keys.get_key_range(&key_range); - - EXPECT_EQ(key_range.size(), max_scan_key); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range), "2"); - EXPECT_EQ(OlapScanKeys::to_print_key(key_range[max_scan_key - 1]->end_scan_range), "32765"); - } -} - -TEST_F(OlapScanKeysTest, ToOlapFilterTest) { - ColumnValueRange range("col"); - - std::vector filters; - range.to_olap_filter(filters); - EXPECT_TRUE(filters.empty()); - - range.set_contain_null(true); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, "is"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "null"); - - range.set_contain_null(false); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, "is"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "not null"); - - EXPECT_TRUE(range.add_range(FILTER_LARGER_OR_EQUAL, 20).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, ">="); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20"); - - EXPECT_TRUE(range.add_range(FILTER_LESS, 100).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, ">="); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20"); - - EXPECT_EQ(std::next(filters.begin(), 1)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 1)->condition_op, "<<"); - EXPECT_EQ(std::next(filters.begin(), 1)->condition_values[0], "100"); - - range.set_whole_value_range(); - EXPECT_TRUE(range.add_range(FILTER_LESS_OR_EQUAL, 100).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, "<="); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "100"); - - EXPECT_TRUE(range.add_range(FILTER_LARGER, 20).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, ">>"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20"); - EXPECT_EQ(std::next(filters.begin(), 1)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 1)->condition_op, "<="); - EXPECT_EQ(std::next(filters.begin(), 1)->condition_values[0], "100"); - - range.set_whole_value_range(); - EXPECT_TRUE(range.add_fixed_value(30).ok()); - EXPECT_TRUE(range.add_fixed_value(40).ok()); - EXPECT_TRUE(range.add_fixed_value(50).ok()); - EXPECT_TRUE(range.add_fixed_value(20).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, "*="); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50"); - - filters.clear(); - range.to_in_condition(filters, false); - EXPECT_EQ(std::next(filters.begin(), 0)->column_name, "col"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_op, "!*="); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "20"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[1], "30"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[2], "40"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[3], "50"); - - EXPECT_TRUE(range.add_range(FILTER_LARGER, 20).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "30"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[1], "40"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[2], "50"); - - EXPECT_TRUE(range.add_range(FILTER_LESS_OR_EQUAL, 40).ok()); - filters.clear(); - range.to_olap_filter(filters); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[0], "30"); - EXPECT_EQ(std::next(filters.begin(), 0)->condition_values[1], "40"); -} - -} // namespace doris diff --git a/be/test/exec/olap_scan_node_test.cpp b/be/test/exec/olap_scan_node_test.cpp deleted file mode 100644 index 4a4a4ff3c3..0000000000 --- a/be/test/exec/olap_scan_node_test.cpp +++ /dev/null @@ -1,431 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/olap_scan_node.h" - -#include -#include -#include - -#include -#include - -#include "exec/text_converter.hpp" -#include "exprs/binary_predicate.h" -#include "exprs/in_predicate.h" -#include "exprs/int_literal.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" -#include "runtime/primitive_type.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "util/cpu_info.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -namespace doris { - -class OlapScanNodeTest : public ::testing::Test { -public: - OlapScanNodeTest() : _runtime_stat("test") {} - - virtual void SetUp() { - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.disable_codegen = true; - ExecEnv* exec_env = new ExecEnv(); - _runtime_stat.init(fragment_id, query_options, "test", exec_env); - - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = ""; - t_table_desc.tableName = ""; - t_table_desc.dbName = ""; - t_table_desc.__isset.mysqlTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - // TSlotDescriptor - int offset = 1; - int i = 0; - // UserId - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("UserId"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // UrlId - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_BIGINT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("UrlId"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int64_t); - } - ++i; - // Date - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("Date"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // PageViews - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(to_thrift(TYPE_BIGINT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("PageViews"); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(int64_t); - } - - t_desc_table.__isset.slotDescriptors = true; - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_stat.set_desc_tbl(_desc_tbl); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::OLAP_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = 100; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.tuple_ids.push_back(0); - _tnode.olap_scan_node.tuple_id = 0; - _tnode.olap_scan_node.key_column_name.push_back("UserId"); - _tnode.olap_scan_node.key_column_name.push_back("UrlId"); - _tnode.olap_scan_node.key_column_name.push_back("Date"); - _tnode.olap_scan_node.key_column_type.push_back(to_thrift(TYPE_INT)); - _tnode.olap_scan_node.key_column_type.push_back(to_thrift(TYPE_BIGINT)); - _tnode.olap_scan_node.key_column_type.push_back(to_thrift(TYPE_INT)); - _tnode.__isset.olap_scan_node = true; - - { - TScanRangeParams param; - TPaloScanRange doris_scan_range; - TNetworkAddress host; - host.__set_hostname("host"); - host.__set_port(9999); - doris_scan_range.hosts.push_back(host); - doris_scan_range.__set_schema_hash("462300563"); - doris_scan_range.__set_version("94"); - // Useless but it is required in TPaloScanRange - doris_scan_range.__set_version_hash("0"); - doris_scan_range.engine_table_name.push_back("DorisTestStats"); - doris_scan_range.__set_db_name("olap"); - //TKeyRange key_range; - //key_range.__set_column_type(to_thrift(TYPE_BIGINT)); - //key_range.__set_begin_key(-5000); - //key_range.__set_end_key(5000); - //key_range.__set_column_name("UrlId"); - //doris_scan_range.partition_column_ranges.push_back(key_range); - param.scan_range.__set_doris_scan_range(doris_scan_range); - _scan_ranges.push_back(param); - } - } - - virtual void TearDown() {} - -private: - TPlanNode _tnode; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState _runtime_stat; - std::vector _scan_ranges; -}; - -TEST_F(OlapScanNodeTest, NormalUse) { - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scanRanges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - int num = std::min(row_batch.num_rows(), 10); - EXPECT_TRUE(num > 0); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - - if (0 == i) { - EXPECT_EQ("[(-5000 -5000 -5000 1)]", print_row(row, scan_node._row_descriptor)); - } - } - - eos = true; - } - - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -TEST_F(OlapScanNodeTest, PushDownBinaryPredicate) { - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - - TExprNode binary_node; - binary_node.node_type = TExprNodeType::BINARY_PRED; - binary_node.type = to_tcolumn_type_thrift(TPrimitiveType::BOOLEAN); - binary_node.num_children = 2; - binary_node.opcode = TExprOpcode::LT_INT_INT; - binary_node.__isset.opcode = true; - - BinaryPredicate bin_pre(binary_node); - - TExprNode slot_node; - slot_node.node_type = TExprNodeType::SLOT_REF; - slot_node.type = to_tcolumn_type_thrift(TPrimitiveType::INT); - slot_node.num_children = 0; - slot_node.slot_ref.slot_id = 0; - slot_node.slot_ref.tuple_id = 0; - slot_node.__isset.slot_ref = true; - - std::vector row_tuples; - row_tuples.push_back(0); - std::vector nullable_tuples; - nullable_tuples.push_back(false); - RowDescriptor row_desc(*_desc_tbl, row_tuples, nullable_tuples); - - bin_pre._children.push_back(_obj_pool.add(new SlotRef(slot_node))); - int v = -4999; - bin_pre._children.push_back(_obj_pool.add(new IntLiteral(TYPE_INT, &v))); - - Status status = bin_pre.prepare(&_runtime_stat, row_desc); - EXPECT_TRUE(status.ok()); - - std::list exprs; - exprs.push_back(&bin_pre); - - scan_node.push_down_predicate(&_runtime_stat, &exprs); - - status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - int num = std::min(row_batch.num_rows(), 10); - EXPECT_TRUE(num > 0); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - EXPECT_EQ("[(-5000 -5000 -5000 1)]", print_row(row, scan_node._row_descriptor)); - } - - eos = true; - } - - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -TEST_F(OlapScanNodeTest, PushDownBinaryEqualPredicate) { - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - - TExprNode binary_node; - binary_node.node_type = TExprNodeType::BINARY_PRED; - binary_node.type = to_tcolumn_type_thrift(TPrimitiveType::BOOLEAN); - binary_node.num_children = 2; - binary_node.opcode = TExprOpcode::EQ_INT_INT; - binary_node.__isset.opcode = true; - - BinaryPredicate bin_pre(binary_node); - - TExprNode slot_node; - slot_node.node_type = TExprNodeType::SLOT_REF; - slot_node.type = to_tcolumn_type_thrift(TPrimitiveType::INT); - slot_node.num_children = 0; - slot_node.slot_ref.slot_id = 0; - slot_node.slot_ref.tuple_id = 0; - slot_node.__isset.slot_ref = true; - - std::vector row_tuples; - row_tuples.push_back(0); - std::vector nullable_tuples; - nullable_tuples.push_back(false); - RowDescriptor row_desc(*_desc_tbl, row_tuples, nullable_tuples); - - bin_pre._children.push_back(_obj_pool.add(new SlotRef(slot_node))); - int v = -5000; - bin_pre._children.push_back(_obj_pool.add(new IntLiteral(TYPE_INT, &v))); - - Status status = bin_pre.prepare(&_runtime_stat, row_desc); - EXPECT_TRUE(status.ok()); - - std::list exprs; - exprs.push_back(&bin_pre); - - scan_node.push_down_predicate(&_runtime_stat, &exprs); - - status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - int num = std::min(row_batch.num_rows(), 10); - EXPECT_TRUE(num > 0); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - EXPECT_EQ("[(-5000 -5000 -5000 1)]", print_row(row, scan_node._row_descriptor)); - } - - eos = true; - } - - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -TEST_F(OlapScanNodeTest, PushDownInPredicateCase) { - OlapScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - - TExprNode in_node; - in_node.node_type = TExprNodeType::IN_PRED; - in_node.type = to_tcolumn_type_thrift(TPrimitiveType::BOOLEAN); - in_node.num_children = 0; - in_node.in_predicate.is_not_in = false; - in_node.__isset.in_predicate = true; - InPredicate in_pre(in_node); - TExprNode slot_node; - slot_node.node_type = TExprNodeType::SLOT_REF; - slot_node.type = to_tcolumn_type_thrift(TPrimitiveType::INT); - slot_node.num_children = 0; - slot_node.slot_ref.slot_id = 0; - slot_node.slot_ref.tuple_id = 0; - slot_node.__isset.slot_ref = true; - - std::vector row_tuples; - row_tuples.push_back(0); - std::vector nullable_tuples; - nullable_tuples.push_back(false); - RowDescriptor row_desc(*_desc_tbl, row_tuples, nullable_tuples); - - in_pre._children.push_back(_obj_pool.add(new SlotRef(slot_node))); - - Status status = in_pre.prepare(&_runtime_stat, row_desc); - EXPECT_TRUE(status.ok()); - - for (int i = -5000; i < -4999; ++i) { - in_pre.insert(&i); - } - - std::list exprs; - exprs.push_back(&in_pre); - - scan_node.push_down_predicate(&_runtime_stat, &exprs); - - status = scan_node.prepare(&_runtime_stat); - EXPECT_TRUE(status.ok()); - status = scan_node.open(&_runtime_stat); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(scan_node.set_scan_ranges(_scan_ranges).ok()); - - RowBatch row_batch(scan_node._row_descriptor, _runtime_stat.batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&_runtime_stat, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - int num = std::min(row_batch.num_rows(), 10); - EXPECT_TRUE(num > 0); - - for (int i = 0; i < num; ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - EXPECT_EQ("[(-5000 -5000 -5000 1)]", print_row(row, scan_node._row_descriptor)); - } - - eos = true; - } - - EXPECT_TRUE(scan_node.close(&_runtime_stat).ok()); -} - -} // namespace doris diff --git a/be/test/exec/olap_scanner_test.cpp b/be/test/exec/olap_scanner_test.cpp deleted file mode 100644 index 1daa7a92d7..0000000000 --- a/be/test/exec/olap_scanner_test.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/olap_scanner.h" - -#include -#include -#include - -#include -#include - -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "util/cpu_info.h" -#include "util/runtime_profile.h" - -namespace doris { - -static const int RES_BUF_SIZE = 100 * 1024 * 1024; -static char res_buf[RES_BUF_SIZE]; - -std::shared_ptr construct_scan_ranges() { - TPaloScanRange doris_scan_range; - TNetworkAddress host; - host.__set_hostname("host"); - host.__set_port(9999); - doris_scan_range.hosts.push_back(host); - doris_scan_range.__set_schema_hash("462300563"); - doris_scan_range.__set_version("94"); - // Useless but it is required in TPaloScanRange - doris_scan_range.__set_version_hash("0"); - doris_scan_range.engine_table_name.push_back("DorisTestStats"); - doris_scan_range.__set_db_name("olap"); - TKeyRange key_range; - key_range.__set_column_type(to_thrift(TYPE_INT)); - key_range.__set_begin_key(-65535); - key_range.__set_end_key(65535); - key_range.__set_column_name("UserId"); - doris_scan_range.partition_column_ranges.push_back(key_range); - std::shared_ptr scan_range(new DorisScanRange(doris_scan_range)); - return scan_range; -} - -void construct_one_tuple(TupleDescriptor& tuple_desc) { - { - TSlotDescriptor t_slot; - t_slot.__set_id(1); - t_slot.__set_parent(2); - t_slot.__set_slotType(::doris::TPrimitiveType::INT); - t_slot.__set_columnPos(0); - t_slot.__set_byteOffset(0); - t_slot.__set_nullIndicatorByte(0); - t_slot.__set_nullIndicatorBit(0); - t_slot.__set_slotIdx(0); - t_slot.__set_isMaterialized(true); - t_slot.__set_colName("UserId"); - - SlotDescriptor* slot = new SlotDescriptor(t_slot); - tuple_desc.add_slot(slot); - } -} - -TEST(OlapIdlUtilTest, normalcase) {} - -} // namespace doris diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp deleted file mode 100644 index 7356458f47..0000000000 --- a/be/test/exec/orc_scanner_test.cpp +++ /dev/null @@ -1,897 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/orc_scanner.h" - -#include -#include -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exec/broker_scan_node.h" -#include "exprs/cast_functions.h" -#include "exprs/decimalv2_operators.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class OrcScannerTest : public testing::Test { -public: - OrcScannerTest() : _runtime_state(TQueryGlobals()) { - _profile = _runtime_state.runtime_profile(); - _runtime_state.init_mem_trackers(); - } - - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - DecimalV2Operators::init(); - } - -protected: - virtual void SetUp() {} - - virtual void TearDown() {} - -private: - RuntimeState _runtime_state; - RuntimeProfile* _profile; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - std::vector _addresses; - ScannerCounter _counter; - std::vector _pre_filter; - bool _fill_tuple; -}; - -TEST_F(OrcScannerTest, normal) { - TBrokerScanRangeParams params; - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - - TTypeDesc big_int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); - node.__set_scalar_type(scalar_type); - big_int_type.types.push_back(node); - } - - TTypeDesc float_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::FLOAT); - node.__set_scalar_type(scalar_type); - float_type.types.push_back(node); - } - - TTypeDesc double_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DOUBLE); - node.__set_scalar_type(scalar_type); - double_type.types.push_back(node); - } - - TTypeDesc date_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DATE); - node.__set_scalar_type(scalar_type); - date_type.types.push_back(node); - } - - //col1 varchar -> bigint - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = big_int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttobigint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = big_int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_big_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 0; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(8, expr); - params.src_slot_ids.push_back(0); - } - //col2, col3 - for (int i = 1; i <= 2; i++) { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = i; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(8 + i, expr); - params.src_slot_ids.push_back(i); - } - - //col5 varchar -> double - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = double_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodouble"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = double_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_double_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 3; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(11, expr); - params.src_slot_ids.push_back(3); - } - - //col6 varchar -> float - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = float_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttofloat"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = float_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_float_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 4; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(12, expr); - params.src_slot_ids.push_back(4); - } - //col7,col8 - for (int i = 5; i <= 6; i++) { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttoint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = i; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(8 + i, expr); - params.src_slot_ids.push_back(i); - } - - //col9 varchar -> var - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 7; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(15, expr); - params.src_slot_ids.push_back(7); - } - - params.__set_src_tuple_id(0); - params.__set_dest_tuple_id(1); - - //init_desc_table - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - TDescriptorTableBuilder dtb; - TTupleDescriptorBuilder src_tuple_builder; - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col1") - .column_pos(1) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col2") - .column_pos(2) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col3") - .column_pos(3) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col5") - .column_pos(4) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col6") - .column_pos(5) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col7") - .column_pos(6) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col8") - .column_pos(7) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col9") - .column_pos(8) - .build()); - src_tuple_builder.build(&dtb); - - TTupleDescriptorBuilder dest_tuple_builder; - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_BIGINT).column_name("col1").column_pos(1).build()); - dest_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col2") - .column_pos(2) - .build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().string_type(65535).column_name("col3").column_pos(3).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_DOUBLE).column_name("col5").column_pos(4).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_FLOAT).column_name("col6").column_pos(5).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_INT).column_name("col7").column_pos(6).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_INT).column_name("col8").column_pos(7).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().string_type(65535).column_name("col9").column_pos(8).build()); - dest_tuple_builder.build(&dtb); - t_desc_table = dtb.desc_tbl(); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - _runtime_state.set_desc_tbl(_desc_tbl); - - std::vector ranges; - TBrokerRangeDesc rangeDesc; - rangeDesc.start_offset = 0; - rangeDesc.size = -1; - rangeDesc.format_type = TFileFormatType::FORMAT_ORC; - rangeDesc.splittable = false; - - rangeDesc.path = "./be/test/exec/test_data/orc_scanner/my-file.orc"; - rangeDesc.file_type = TFileType::FILE_LOCAL; - ranges.push_back(rangeDesc); - - ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, _pre_filter, - &_counter); - EXPECT_TRUE(scanner.open().ok()); - - MemPool tuple_pool; - - Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size()); - bool eof = false; - - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), - "(0 null doris 1.567 1.567000031471252 12345 1 doris)"); - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), - "(1 true doris 1.567 1.567000031471252 12345 1 doris)"); - EXPECT_FALSE(eof); - for (int i = 2; i < 10; i++) { - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - } - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_TRUE(eof); - scanner.close(); -} - -TEST_F(OrcScannerTest, normal2) { - TBrokerScanRangeParams params; - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::INT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 1; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(3, expr); - params.src_slot_ids.push_back(1); - } - params.__set_src_tuple_id(0); - params.__set_dest_tuple_id(1); - - //init_desc_table - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - TDescriptorTableBuilder dtb; - TTupleDescriptorBuilder src_tuple_builder; - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col1") - .column_pos(1) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col2") - .column_pos(2) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col3") - .column_pos(3) - .build()); - src_tuple_builder.build(&dtb); - TTupleDescriptorBuilder dest_tuple_builder; - dest_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .column_name("value_from_col2") - .column_pos(1) - .build()); - - dest_tuple_builder.build(&dtb); - t_desc_table = dtb.desc_tbl(); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - _runtime_state.set_desc_tbl(_desc_tbl); - - std::vector ranges; - TBrokerRangeDesc rangeDesc; - rangeDesc.start_offset = 0; - rangeDesc.size = -1; - rangeDesc.format_type = TFileFormatType::FORMAT_ORC; - rangeDesc.splittable = false; - - rangeDesc.path = "./be/test/exec/test_data/orc_scanner/my-file.orc"; - rangeDesc.file_type = TFileType::FILE_LOCAL; - ranges.push_back(rangeDesc); - - ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, _pre_filter, - &_counter); - EXPECT_TRUE(scanner.open().ok()); - - MemPool tuple_pool; - - Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size()); - bool eof = false; - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), "(null)"); - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), "(true)"); - scanner.close(); -} - -TEST_F(OrcScannerTest, normal3) { - TBrokerScanRangeParams params; - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - - TTypeDesc decimal_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DECIMALV2); - scalar_type.__set_precision(64); - scalar_type.__set_scale(64); - node.__set_scalar_type(scalar_type); - decimal_type.types.push_back(node); - } - - TTypeDesc tinyint_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::TINYINT); - node.__set_scalar_type(scalar_type); - tinyint_type.types.push_back(node); - } - - TTypeDesc datetime_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DATETIME); - node.__set_scalar_type(scalar_type); - datetime_type.types.push_back(node); - } - - TTypeDesc date_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::DATE); - node.__set_scalar_type(scalar_type); - date_type.types.push_back(node); - } - - { - for (int i = 0; i < 5; ++i) { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = decimal_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodecimalv2"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = decimal_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("cast_to_decimalv2_val(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::DecimalV2Operators::cast_to_decimalv2_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = i; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(9 + i, expr); - params.src_slot_ids.push_back(i); - } - - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = tinyint_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttotinyint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = tinyint_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("cast_to_tiny_int_val(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_tiny_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 5; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(14, expr); - params.src_slot_ids.push_back(5); - } - - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = datetime_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodatetime"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = datetime_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("cast_to_datetime_val(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_datetime_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 6; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(15, expr); - params.src_slot_ids.push_back(6); - } - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = date_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodate"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = date_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_date_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 7; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(16, expr); - params.src_slot_ids.push_back(7); - } - { - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = decimal_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttodecimalv2"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = decimal_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("cast_to_decimalv2_val(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::DecimalV2Operators::cast_to_decimalv2_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = 8; - slot_ref.slot_ref.tuple_id = 0; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - params.expr_of_dest_slot.emplace(17, expr); - params.src_slot_ids.push_back(8); - } - } - params.__set_src_tuple_id(0); - params.__set_dest_tuple_id(1); - - //init_desc_table - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - TDescriptorTableBuilder dtb; - TTupleDescriptorBuilder src_tuple_builder; - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col1") - .column_pos(1) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col2") - .column_pos(2) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col3") - .column_pos(3) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col4") - .column_pos(4) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col5") - .column_pos(5) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col6") - .column_pos(6) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col7") - .column_pos(7) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col8") - .column_pos(8) - .build()); - src_tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(65535) - .nullable(true) - .column_name("col9") - .column_pos(9) - .build()); - src_tuple_builder.build(&dtb); - - TTupleDescriptorBuilder dest_tuple_builder; - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(10, 9).column_name("col1").column_pos(1).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(7, 5).column_name("col2").column_pos(2).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(10, 9).column_name("col3").column_pos(3).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(10, 5).column_name("col4").column_pos(4).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(10, 5).column_name("col5").column_pos(5).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_TINYINT).column_name("col6").column_pos(6).build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_DATETIME).column_name("col7").column_pos(7).build()); - dest_tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_DATE) - .nullable(true) - .column_name("col8") - .column_pos(8) - .build()); - dest_tuple_builder.add_slot( - TSlotDescriptorBuilder().decimal_type(27, 9).column_name("col9").column_pos(9).build()); - - dest_tuple_builder.build(&dtb); - t_desc_table = dtb.desc_tbl(); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - _runtime_state.set_desc_tbl(_desc_tbl); - - std::vector ranges; - TBrokerRangeDesc rangeDesc; - rangeDesc.start_offset = 0; - rangeDesc.size = -1; - rangeDesc.format_type = TFileFormatType::FORMAT_ORC; - rangeDesc.splittable = false; - - rangeDesc.path = "./be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc"; - rangeDesc.file_type = TFileType::FILE_LOCAL; - ranges.push_back(rangeDesc); - - ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, _pre_filter, - &_counter); - EXPECT_TRUE(scanner.open().ok()); - - MemPool tuple_pool; - - Tuple* tuple = (Tuple*)tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size()); - bool eof = false; - EXPECT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof, &_fill_tuple).ok()); - EXPECT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)), - "(0.123456789 1.12 -1.12345 0.12345 0 1 2020-01-14 14:12:19 2020-02-10 " - "-0.0014)"); - scanner.close(); -} - -} // end namespace doris diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp deleted file mode 100644 index d330a44b3f..0000000000 --- a/be/test/exec/parquet_scanner_test.cpp +++ /dev/null @@ -1,557 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include - -#include -#include -#include - -#include "common/object_pool.h" -#include "exec/broker_scan_node.h" -#include "exprs/cast_functions.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "io/local_file_reader.h" -#include "runtime/descriptors.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "runtime/user_function_cache.h" - -namespace doris { - -class ParquetScannerTest : public testing::Test { -public: - ParquetScannerTest() : _runtime_state(TQueryGlobals()) { - init(); - _runtime_state.init_mem_trackers(); - } - void init(); - static void SetUpTestCase() { - UserFunctionCache::instance()->init( - "./be/test/runtime/test_data/user_function_cache/normal"); - CastFunctions::init(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - int create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - int create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id); - void create_expr_info(); - void init_desc_table(); - void init_filter_expr(); - RuntimeState _runtime_state; - ObjectPool _obj_pool; - std::map _slots_map; - TBrokerScanRangeParams _params; - DescriptorTbl* _desc_tbl; - TPlanNode _tnode; -}; - -#define TUPLE_ID_DST 0 -#define TUPLE_ID_SRC 1 -#define COLUMN_NUMBERS 20 -#define DST_TUPLE_SLOT_ID_START 1 -#define SRC_TUPLE_SLOT_ID_START 21 -int ParquetScannerTest::create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { - const char* columnNames[] = { - "log_version", "log_time", "log_time_stamp", "js_version", - "vst_cookie", "vst_ip", "vst_user_id", "vst_user_agent", - "device_resolution", "page_url", "page_refer_url", "page_yyid", - "page_type", "pos_type", "content_id", "media_id", - "spm_cnt", "spm_pre", "scm_cnt", "partition_column"}; - for (int i = 0; i < COLUMN_NUMBERS; i++) { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 1; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = i; - // Skip the first 8 bytes These 8 bytes are used to indicate whether the field is a null value - slot_desc.byteOffset = i * 16 + 8; - slot_desc.nullIndicatorByte = i / 8; - slot_desc.nullIndicatorBit = i % 8; - slot_desc.colName = columnNames[i]; - slot_desc.slotIdx = i + 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - { - // TTupleDescriptor source - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_SRC; - //Here 8 bytes in order to handle null values - t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -int ParquetScannerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id) { - int32_t byteOffset = - 8; // Skip the first 8 bytes These 8 bytes are used to indicate whether the field is a null value - { //log_version - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); //parquet::Type::BYTE - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 0; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 0; - slot_desc.colName = "log_version"; - slot_desc.slotIdx = 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 16; - { // log_time - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); //parquet::Type::INT64 - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 1; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 1; - slot_desc.colName = "log_time"; - slot_desc.slotIdx = 2; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 8; - { // log_time_stamp - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); //parquet::Type::INT32 - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = 2; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = 0; - slot_desc.nullIndicatorBit = 2; - slot_desc.colName = "log_time_stamp"; - slot_desc.slotIdx = 3; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - byteOffset += 8; - const char* columnNames[] = { - "log_version", "log_time", "log_time_stamp", "js_version", - "vst_cookie", "vst_ip", "vst_user_id", "vst_user_agent", - "device_resolution", "page_url", "page_refer_url", "page_yyid", - "page_type", "pos_type", "content_id", "media_id", - "spm_cnt", "spm_pre", "scm_cnt", "partition_column"}; - for (int i = 3; i < COLUMN_NUMBERS; i++, byteOffset += 16) { - TSlotDescriptor slot_desc; - - slot_desc.id = next_slot_id++; - slot_desc.parent = 0; - TTypeDesc type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); //parquet::Type::BYTE - scalar_type.__set_len(65535); - node.__set_scalar_type(scalar_type); - type.types.push_back(node); - } - slot_desc.slotType = type; - slot_desc.columnPos = i; - slot_desc.byteOffset = byteOffset; - slot_desc.nullIndicatorByte = i / 8; - slot_desc.nullIndicatorBit = i % 8; - slot_desc.colName = columnNames[i]; - slot_desc.slotIdx = i + 1; - slot_desc.isMaterialized = true; - - t_desc_table.slotDescriptors.push_back(slot_desc); - } - - t_desc_table.__isset.slotDescriptors = true; - { - // TTupleDescriptor dest - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = TUPLE_ID_DST; - t_tuple_desc.byteSize = byteOffset + 8; //Here 8 bytes in order to handle null values - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - } - return next_slot_id; -} - -void ParquetScannerTest::init_desc_table() { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::BROKER_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - - int next_slot_id = 1; - - next_slot_id = create_dst_tuple(t_desc_table, next_slot_id); - - next_slot_id = create_src_tuple(t_desc_table, next_slot_id); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - _runtime_state.set_desc_tbl(_desc_tbl); -} - -void ParquetScannerTest::create_expr_info() { - TTypeDesc varchar_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::VARCHAR); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - varchar_type.types.push_back(node); - } - // log_version VARCHAR --> VARCHAR - { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START; // log_time id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START); - } - // log_time VARCHAR --> BIGINT - { - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttoint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_big_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 1; // log_time id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 1, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 1); - } - // log_time_stamp VARCHAR --> BIGINT - { - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - TExprNode cast_expr; - cast_expr.node_type = TExprNodeType::CAST_EXPR; - cast_expr.type = int_type; - cast_expr.__set_opcode(TExprOpcode::CAST); - cast_expr.__set_num_children(1); - cast_expr.__set_output_scale(-1); - cast_expr.__isset.fn = true; - cast_expr.fn.name.function_name = "casttoint"; - cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN; - cast_expr.fn.arg_types.push_back(varchar_type); - cast_expr.fn.ret_type = int_type; - cast_expr.fn.has_var_args = false; - cast_expr.fn.__set_signature("casttoint(VARCHAR(*))"); - cast_expr.fn.__isset.scalar_fn = true; - cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_big_int_val"; - - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 2; - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(cast_expr); - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 2, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 2); - } - // couldn't convert type - for (int i = 3; i < COLUMN_NUMBERS; i++) { - TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = varchar_type; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + i; // log_time id in src tuple - slot_ref.slot_ref.tuple_id = 1; - - TExpr expr; - expr.nodes.push_back(slot_ref); - - _params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + i, expr); - _params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + i); - } - - // _params.__isset.expr_of_dest_slot = true; - _params.__set_dest_tuple_id(TUPLE_ID_DST); - _params.__set_src_tuple_id(TUPLE_ID_SRC); -} - -void ParquetScannerTest::init_filter_expr() { - TTypeDesc bool_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BOOLEAN); - scalar_type.__set_len(5000); - node.__set_scalar_type(scalar_type); - bool_type.types.push_back(node); - } - TTypeDesc int_type; - { - TTypeNode node; - node.__set_type(TTypeNodeType::SCALAR); - TScalarType scalar_type; - scalar_type.__set_type(TPrimitiveType::BIGINT); - node.__set_scalar_type(scalar_type); - int_type.types.push_back(node); - } - - // create predicate - ::doris::TExpr expr; - - // create predicate elements: LeftExpr op RightExpr - // expr: log_time > 1 - ::doris::TExprNode op; - op.node_type = TExprNodeType::BINARY_PRED; - op.opcode = TExprOpcode::GT; - op.type = bool_type; - op.num_children = 2; - op.child_type = TPrimitiveType::BIGINT; - op.__isset.opcode = true; - expr.nodes.push_back(op); - - // log_time - ::doris::TExprNode slot_ref; - slot_ref.node_type = TExprNodeType::SLOT_REF; - slot_ref.type = int_type; - slot_ref.slot_ref.slot_id = 1; - slot_ref.slot_ref.tuple_id = 0; - slot_ref.num_children = 0; - slot_ref.__isset.slot_ref = true; - expr.nodes.push_back(slot_ref); - - ::doris::TExprNode int_expr; - int_expr.node_type = TExprNodeType::INT_LITERAL; - int_expr.type = int_type; - int_expr.int_literal.value = 1; - int_expr.num_children = 0; - int_expr.__isset.int_literal = true; - - expr.nodes.push_back(int_expr); - - std::vector<::doris::TExpr> conjuncts; - conjuncts.push_back(expr); - // push down conjuncts; - _tnode.__set_conjuncts(conjuncts); -} - -void ParquetScannerTest::init() { - create_expr_info(); - init_desc_table(); - init_filter_expr(); - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.broker_scan_node.tuple_id = 0; - _tnode.__isset.broker_scan_node = true; - _tnode.__isset.conjuncts = true; -} - -TEST_F(ParquetScannerTest, normal) { - BrokerScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - auto status = scan_node.prepare(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // set scan range - std::vector scan_ranges; - { - TScanRangeParams scan_range_params; - - TBrokerScanRange broker_scan_range; - broker_scan_range.params = _params; - TBrokerRangeDesc range; - range.start_offset = 0; - range.size = -1; - range.format_type = TFileFormatType::FORMAT_PARQUET; - range.splittable = true; - - std::vector columns_from_path {"value"}; - range.__set_columns_from_path(columns_from_path); - range.__set_num_of_columns_from_file(19); -#if 1 - range.path = "./be/test/exec/test_data/parquet_scanner/localfile.parquet"; - range.file_type = TFileType::FILE_LOCAL; -#else - range.path = "hdfs://ip:8020/user/xxxx.parq"; - range.file_type = TFileType::FILE_BROKER; - TNetworkAddress addr; - addr.__set_hostname("127.0.0.1"); - addr.__set_port(8000); - broker_scan_range.broker_addresses.push_back(addr); -#endif - broker_scan_range.ranges.push_back(range); - scan_range_params.scan_range.__set_broker_scan_range(broker_scan_range); - scan_ranges.push_back(scan_range_params); - } - - scan_node.set_scan_ranges(scan_ranges); - status = scan_node.open(&_runtime_state); - EXPECT_TRUE(status.ok()); - - // Get batch - RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size()); - bool eof = false; - for (int i = 0; i < 14; i++) { - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(2048, batch.num_rows()); - EXPECT_FALSE(eof); - batch.reset(); - } - - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(1328, batch.num_rows()); - EXPECT_FALSE(eof); - batch.reset(); - status = scan_node.get_next(&_runtime_state, &batch, &eof); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(0, batch.num_rows()); - EXPECT_TRUE(eof); - - scan_node.close(&_runtime_state); - { - std::stringstream ss; - scan_node.runtime_profile()->pretty_print(&ss); - LOG(INFO) << ss.str(); - } -} - -} // namespace doris diff --git a/be/test/exec/plain_text_line_reader_bzip_test.cpp b/be/test/exec/plain_text_line_reader_bzip_test.cpp deleted file mode 100644 index 900f261673..0000000000 --- a/be/test/exec/plain_text_line_reader_bzip_test.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "exec/decompressor.h" -#include "exec/plain_text_line_reader.h" -#include "io/local_file_reader.h" -#include "util/runtime_profile.h" - -namespace doris { - -class PlainTextLineReaderTest : public testing::Test { -public: - PlainTextLineReaderTest() : _profile("TestProfile") {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - RuntimeProfile _profile; -}; - -TEST_F(PlainTextLineReaderTest, bzip2_normal_use) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/test_file.csv.bz2", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - - // 1,2,3,4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(7, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, bzip2_test_limit) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.bz2", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 8, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, bzip2_test_limit2) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.bz2", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 6, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, bzip2_test_limit3) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.bz2", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, bzip2_test_limit4) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.bz2", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, bzip2_test_limit5) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.bz2", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::BZIP2, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 0, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -} // end namespace doris diff --git a/be/test/exec/plain_text_line_reader_gzip_test.cpp b/be/test/exec/plain_text_line_reader_gzip_test.cpp deleted file mode 100644 index fea15d00c4..0000000000 --- a/be/test/exec/plain_text_line_reader_gzip_test.cpp +++ /dev/null @@ -1,268 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "exec/decompressor.h" -#include "exec/plain_text_line_reader.h" -#include "io/local_file_reader.h" -#include "util/runtime_profile.h" - -namespace doris { - -class PlainTextLineReaderGzipTest : public testing::Test { -public: - PlainTextLineReaderGzipTest() : _profile("TestProfile") {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - RuntimeProfile _profile; -}; - -TEST_F(PlainTextLineReaderGzipTest, gzip_normal_use) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/test_file.csv.gz", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - - // 1,2,3,4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(7, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, uncompressed_no_newline) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/no_newline.csv.gz", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2,3 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_STREQ("1,2,3", std::string((char*)ptr, size).c_str()); - EXPECT_FALSE(eof); - - // 4,5 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_STREQ("4,5", std::string((char*)ptr, size).c_str()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, gzip_test_limit) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.gz", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 8, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, gzip_test_limit2) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.gz", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 6, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, gzip_test_limit3) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.gz", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, gzip_test_limit4) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.gz", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderGzipTest, gzip_test_limit5) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.gz", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::GZIP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 0, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -} // end namespace doris diff --git a/be/test/exec/plain_text_line_reader_lz4frame_test.cpp b/be/test/exec/plain_text_line_reader_lz4frame_test.cpp deleted file mode 100644 index f6d0844455..0000000000 --- a/be/test/exec/plain_text_line_reader_lz4frame_test.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "exec/decompressor.h" -#include "exec/plain_text_line_reader.h" -#include "io/local_file_reader.h" -#include "util/runtime_profile.h" - -namespace doris { - -class PlainTextLineReaderTest : public testing::Test { -public: - PlainTextLineReaderTest() : _profile("TestProfile") {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - RuntimeProfile _profile; -}; - -TEST_F(PlainTextLineReaderTest, lz4_normal_use) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/test_file.csv.lz4", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - - // 1,2,3,4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(7, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, lz4_test_limit) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lz4", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 8, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, lz4_test_limit2) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lz4", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 6, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, lz4_test_limit3) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lz4", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, lz4_test_limit4) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lz4", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -TEST_F(PlainTextLineReaderTest, lz4_test_limit5) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lz4", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZ4FRAME, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 0, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - delete decompressor; -} - -} // end namespace doris diff --git a/be/test/exec/plain_text_line_reader_lzop_test.cpp b/be/test/exec/plain_text_line_reader_lzop_test.cpp deleted file mode 100644 index 99aa6336e0..0000000000 --- a/be/test/exec/plain_text_line_reader_lzop_test.cpp +++ /dev/null @@ -1,290 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "exec/decompressor.h" -#include "exec/plain_text_line_reader.h" -#include "io/local_file_reader.h" -#include "util/runtime_profile.h" - -namespace doris { - -class PlainTextLineReaderTest : public testing::Test { -public: - PlainTextLineReaderTest() : _profile("TestProfile") {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - RuntimeProfile _profile; -}; - -TEST_F(PlainTextLineReaderTest, lzop_normal_use) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/test_file.csv.lzo", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - - // 1,2,3,4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(7, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_limit) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lzo", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 8, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_limit2) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lzo", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 6, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_limit3) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lzo", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_limit4) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lzo", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_limit5) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv.lzo", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 0, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); -} - -TEST_F(PlainTextLineReaderTest, lzop_test_larger) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/larger.txt.lzo", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::LZOP, &decompressor); - EXPECT_TRUE(st.ok()); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(20, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(30, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 3 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(9, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 5 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(30, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 6 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // 7 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_TRUE(eof); -} - -} // end namespace doris diff --git a/be/test/exec/plain_text_line_reader_uncompressed_test.cpp b/be/test/exec/plain_text_line_reader_uncompressed_test.cpp deleted file mode 100644 index 815d119ba1..0000000000 --- a/be/test/exec/plain_text_line_reader_uncompressed_test.cpp +++ /dev/null @@ -1,293 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "exec/decompressor.h" -#include "exec/plain_text_line_reader.h" -#include "io/local_file_reader.h" -#include "util/runtime_profile.h" - -namespace doris { - -class PlainTextLineReaderUncompressedTest : public testing::Test { -public: - PlainTextLineReaderUncompressedTest() : _profile("TestProfile") {} - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - -private: - RuntimeProfile _profile; -}; - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_normal_use) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/test_file.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(0, size); - EXPECT_FALSE(eof); - - // 1,2,3,4 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(7, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_no_newline) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/no_newline.csv", - 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, -1, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // 1,2,3 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_STREQ("1,2,3", std::string((char*)ptr, size).c_str()); - EXPECT_FALSE(eof); - - // 4,5 - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(3, size); - EXPECT_STREQ("4,5", std::string((char*)ptr, size).c_str()); - EXPECT_FALSE(eof); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_limit) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 8, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - EXPECT_FALSE(eof); - LOG(INFO) << std::string((const char*)ptr, size); - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_limit2) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 6, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_limit3) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_limit4) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 7, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(5, size); - LOG(INFO) << std::string((const char*)ptr, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_FALSE(eof); - EXPECT_EQ(0, size); - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_limit5) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/limit.csv", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 0, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - // Empty - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -TEST_F(PlainTextLineReaderUncompressedTest, uncompressed_test_empty) { - LocalFileReader file_reader("./be/test/exec/test_data/plain_text_line_reader/empty.txt", 0); - auto st = file_reader.open(); - EXPECT_TRUE(st.ok()); - - Decompressor* decompressor; - st = Decompressor::create_decompressor(CompressType::UNCOMPRESSED, &decompressor); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(decompressor == nullptr); - - // set min length larger than 0 to test - PlainTextLineReader line_reader(&_profile, &file_reader, decompressor, 10, "\n", 1); - const uint8_t* ptr; - size_t size; - bool eof; - - st = line_reader.read_line(&ptr, &size, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_TRUE(eof); -} - -} // end namespace doris diff --git a/be/test/exec/s3_reader_test.cpp b/be/test/exec/s3_reader_test.cpp deleted file mode 100644 index 7e00751f52..0000000000 --- a/be/test/exec/s3_reader_test.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "io/s3_reader.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "common/status.h" -#include "io/s3_writer.h" - -namespace doris { -using namespace ErrorCode; -static const std::string AK = ""; -static const std::string SK = ""; -static const std::string ENDPOINT = "http://s3.bj.bcebos.com"; -static const std::string REGION = "bj"; -static const std::string BUCKET = "s3://yang-repo/"; - -// remove DISABLED_ when need run this test -#define S3ReaderTest DISABLED_S3ReaderTest -class S3ReaderTest : public testing::Test { -public: - S3ReaderTest() - : _aws_properties({{"AWS_ACCESS_KEY", AK}, - {"AWS_SECRET_KEY", SK}, - {"AWS_ENDPOINT", ENDPOINT}, - {"AWS_REGION", REGION}}) { - _s3_base_path = BUCKET + "s3/" + gen_uuid(); - } - -protected: - virtual void SetUp() {} - virtual void TearDown() {} - std::string gen_uuid() { - auto id = boost::uuids::random_generator()(); - return boost::lexical_cast(id); - } - std::map _aws_properties; - std::string _s3_base_path; - std::string _content = - "O wild West Wind, thou breath of Autumn's being\n" - "Thou, from whose unseen presence the leaves dead\n" - "Are driven, like ghosts from an enchanter fleeing,\n" - "Yellow, and black, and pale, and hectic red,\n" - "Pestilence-stricken multitudes:O thou\n" - "Who chariotest to their dark wintry bed\n" - "The winged seeds, where they lie cold and low,\n" - "Each like a corpse within its grave, until\n" - "Thine azure sister of the Spring shall blow\n" - "Her clarion o'er the dreaming earth, and fill\n" - "(Driving sweet buds like flocks to feed in air)\n" - "With living hues and odors plain and hill:\n" - "Wild Spirit, which art moving everywhere;\n" - "Destroyer and preserver; hear, oh, hear!"; -}; - -TEST_F(S3ReaderTest, normal) { - std::string path = _s3_base_path + "/test_file"; - std::unique_ptr writer(new S3Writer(_aws_properties, path, 0)); - auto st = writer->open(); - EXPECT_TRUE(st.ok()); - size_t l = 0; - st = writer->write(reinterpret_cast(_content.c_str()), _content.length(), &l); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(_content.length(), l); - st = writer->close(); - EXPECT_TRUE(st.ok()); - std::unique_ptr writer1(new S3Writer(_aws_properties, path, 0)); - st = writer1->open(); - EXPECT_TRUE(st.is()); - std::unique_ptr reader(new S3Reader(_aws_properties, path, 0)); - st = reader->open(); - EXPECT_TRUE(st.ok()); - std::unique_ptr reader1(new S3Reader(_aws_properties, path + "xx", 0)); - st = reader1->open(); - EXPECT_TRUE(st.is()); - EXPECT_EQ(_content.length(), reader->size()); - std::string verification_contents; - verification_contents.resize(_content.length()); - int64_t total_read = 0; - bool eof = false; - st = reader->read((uint8_t*)&verification_contents[0], _content.length(), &total_read, &eof); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(_content, verification_contents); - EXPECT_EQ(_content.length(), total_read); - EXPECT_FALSE(eof); - st = reader->read((uint8_t*)&verification_contents[0], _content.length(), &total_read, &eof); - EXPECT_TRUE(eof); - int64_t t = 0; - st = reader->tell(&t); - EXPECT_TRUE(st.ok()); - EXPECT_EQ(_content.length(), t); - st = reader->readat(_content.length(), _content.length(), (int64_t*)(&total_read), - (uint8_t*)&verification_contents[0]); - LOG(INFO) << total_read; - EXPECT_TRUE(total_read == 0); -} -} // end namespace doris diff --git a/be/test/exec/schema_scan_node_test.cpp b/be/test/exec/schema_scan_node_test.cpp deleted file mode 100644 index 3027c7787c..0000000000 --- a/be/test/exec/schema_scan_node_test.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scan_node.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/text_converter.hpp" -#include "gen_cpp/PlanNodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/string_value.h" -#include "runtime/tuple_row.h" -#include "schema_scan_node.h" -#include "util/debug_util.h" -#include "util/runtime_profile.h" - -namespace doris { - -// mock -class SchemaScanNodeTest : public testing::Test { -public: - SchemaScanNodeTest() : runtime_state("test") { - TDescriptorTable t_desc_table; - - // table descriptors - TTableDescriptor t_table_desc; - - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::SCHEMA_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.schemaTable.tableType = TSchemaTableType::SCH_AUTHORS; - t_table_desc.tableName = "test_table"; - t_table_desc.dbName = "test_db"; - t_table_desc.__isset.schemaTable = true; - t_desc_table.tableDescriptors.push_back(t_table_desc); - t_desc_table.__isset.tableDescriptors = true; - // TSlotDescriptor - int offset = 0; - - for (int i = 0; i < 3; ++i) { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_slotType(to_thrift(TYPE_STRING)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_desc_table.slotDescriptors.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - - t_desc_table.__isset.slotDescriptors = true; - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 0; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl); - - runtime_state.set_desc_tbl(_desc_tbl); - - // Node Id - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.schema_scan_node.table_name = "test_table"; - _tnode.schema_scan_node.tuple_id = 0; - _tnode.__isset.schema_scan_node = true; - } - - virtual ~SchemaScanNodeTest() {} - - virtual void SetUp() {} - virtual void TearDown() {} - -private: - TPlanNode _tnode; - ObjectPool _obj_pool; - DescriptorTbl* _desc_tbl; - RuntimeState runtime_state; -}; - -TEST_F(SchemaScanNodeTest, normal_use) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&runtime_state); - EXPECT_TRUE(status.ok()); - status = scan_node.prepare(&runtime_state); - EXPECT_TRUE(status.ok()); - std::vector scan_ranges; - status = scan_node.set_scan_ranges(scan_ranges); - EXPECT_TRUE(status.ok()); - std::stringstream out; - scan_node.debug_string(1, &out); - LOG(WARNING) << out.str(); - - status = scan_node.open(&runtime_state); - EXPECT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&runtime_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - if (!eos) { - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - } - } - } - - status = scan_node.close(&runtime_state); - EXPECT_TRUE(status.ok()); -} -TEST_F(SchemaScanNodeTest, Prepare_fail_1) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - TableDescriptor* old = _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc; - _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc = nullptr; - Status status = scan_node.prepare(&runtime_state); - EXPECT_FALSE(status.ok()); - _desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc = old; -} -TEST_F(SchemaScanNodeTest, Prepare_fail_2) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node._tuple_id = 1; - Status status = scan_node.prepare(&runtime_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaScanNodeTest, dummy) { - SchemaTableDescriptor* t_desc = - (SchemaTableDescriptor*)_desc_tbl->_tuple_desc_map[(TupleId)0]->_table_desc; - t_desc->_schema_table_type = TSchemaTableType::SCH_EVENTS; - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(&runtime_state); - EXPECT_TRUE(status.ok()); - status = scan_node.prepare(&runtime_state); - EXPECT_TRUE(status.ok()); - std::vector scan_ranges; - status = scan_node.set_scan_ranges(scan_ranges); - EXPECT_TRUE(status.ok()); - std::stringstream out; - scan_node.debug_string(1, &out); - LOG(WARNING) << out.str(); - - status = scan_node.open(&runtime_state); - EXPECT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(&runtime_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - - if (!eos) { - for (int i = 0; i < row_batch.num_rows(); ++i) { - TupleRow* row = row_batch.get_row(i); - LOG(WARNING) << "input row: " << print_row(row, scan_node._row_descriptor); - } - } - } - - status = scan_node.close(&runtime_state); - EXPECT_TRUE(status.ok()); - t_desc->_schema_table_type = TSchemaTableType::SCH_AUTHORS; -} -TEST_F(SchemaScanNodeTest, get_dest_desc_fail) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node._tuple_id = 1; - Status status = scan_node.prepare(&runtime_state); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaScanNodeTest, invalid_param) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - Status status = scan_node.prepare(nullptr); - EXPECT_FALSE(status.ok()); - status = scan_node.prepare(&runtime_state); - EXPECT_TRUE(status.ok()); - status = scan_node.open(nullptr); - EXPECT_FALSE(status.ok()); - status = scan_node.open(&runtime_state); - EXPECT_TRUE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos; - status = scan_node.get_next(nullptr, &row_batch, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaScanNodeTest, no_init) { - SchemaScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - //Status status = scan_node.prepare(&runtime_state); - //EXPECT_TRUE(status.ok()); - Status status = scan_node.open(&runtime_state); - EXPECT_FALSE(status.ok()); - RowBatch row_batch(scan_node._row_descriptor, 100); - bool eos; - status = scan_node.get_next(&runtime_state, &row_batch, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_authors_scanner_test.cpp b/be/test/exec/schema_scanner/schema_authors_scanner_test.cpp deleted file mode 100644 index 298d4439f2..0000000000 --- a/be/test/exec/schema_scanner/schema_authors_scanner_test.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_authors_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -class SchemaAuthorScannerTest : public testing::Test { -public: - SchemaAuthorScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaAuthorScannerTest, normal_use) { - SchemaAuthorsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - while (!eos) { - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - for (int i = 0; i < 3; ++i) { - LOG(INFO) - << ((StringValue*)tuple->get_slot(tuple_desc->slots()[i]->tuple_offset()))->ptr; - } - } -} - -TEST_F(SchemaAuthorScannerTest, use_with_no_init) { - SchemaAuthorsScanner scanner; - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaAuthorScannerTest, invalid_param) { - SchemaAuthorsScanner scanner; - Status status = scanner.init(&_param, nullptr); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, nullptr, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_charsets_scanner_test.cpp b/be/test/exec/schema_scanner/schema_charsets_scanner_test.cpp deleted file mode 100644 index 83b77f5007..0000000000 --- a/be/test/exec/schema_scanner/schema_charsets_scanner_test.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_charsets_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "util/debug_util.h" - -namespace doris { - -class SchemaCharsetsScannerTest : public testing::Test { -public: - SchemaCharsetsScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaCharsetsScannerTest, normal_use) { - SchemaCharsetsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - while (!eos) { - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - if (!eos) { - LOG(INFO) << print_tuple(tuple, *tuple_desc); - } - } -} - -TEST_F(SchemaCharsetsScannerTest, use_with_no_init) { - SchemaCharsetsScanner scanner; - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaCharsetsScannerTest, invalid_param) { - SchemaCharsetsScanner scanner; - Status status = scanner.init(&_param, nullptr); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, nullptr, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_collations_scanner_test.cpp b/be/test/exec/schema_scanner/schema_collations_scanner_test.cpp deleted file mode 100644 index e215ed3c0c..0000000000 --- a/be/test/exec/schema_scanner/schema_collations_scanner_test.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_collations_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "util/debug_util.h" - -namespace doris { - -class SchemaCollationsScannerTest : public testing::Test { -public: - SchemaCollationsScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaCollationsScannerTest, normal_use) { - SchemaCollationsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - while (!eos) { - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - if (!eos) { - LOG(INFO) << print_tuple(tuple, *tuple_desc); - } - } -} - -TEST_F(SchemaCollationsScannerTest, use_with_no_init) { - SchemaCollationsScanner scanner; - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaCollationsScannerTest, invalid_param) { - SchemaCollationsScanner scanner; - Status status = scanner.init(&_param, nullptr); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, nullptr, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_columns_scanner_test.cpp b/be/test/exec/schema_scanner/schema_columns_scanner_test.cpp deleted file mode 100644 index d2d6c632b6..0000000000 --- a/be/test/exec/schema_scanner/schema_columns_scanner_test.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_columns_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaColumnsScannerTest : public testing::Test { -public: - SchemaColumnsScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaColumnsScannerTest, normal_use) { - SchemaColumnsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaColumnsScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaColumnsScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaColumnsScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaColumnsScannerTest, table_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_table_result = Status::InternalError("get table failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaColumnsScannerTest, desc_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_desc_result = Status::InternalError("get desc failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaColumnsScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaColumnsScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_create_table_scanner_test.cpp b/be/test/exec/schema_scanner/schema_create_table_scanner_test.cpp deleted file mode 100644 index 80ae9ec4f1..0000000000 --- a/be/test/exec/schema_scanner/schema_create_table_scanner_test.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_create_table_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaCreateTableScannerTest : public testing::Test { -public: - SchemaCreateTableScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaCreateTableScannerTest, normal_use) { - SchemaCreateTableScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaCreateTableScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaCreateTableScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaCreateTableScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaCreateTableScannerTest, table_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_table_result = Status::InternalError("get table failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaCreateTableScannerTest, desc_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_desc_result = Status::InternalError("get desc failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaCreateTableScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaCreateTableScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_engines_scanner_test.cpp b/be/test/exec/schema_scanner/schema_engines_scanner_test.cpp deleted file mode 100644 index 15fb923ece..0000000000 --- a/be/test/exec/schema_scanner/schema_engines_scanner_test.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_engines_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "util/debug_util.h" - -namespace doris { - -class SchemaEnginesScannerTest : public testing::Test { -public: - SchemaEnginesScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaEnginesScannerTest, normal_use) { - SchemaEnginesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - while (!eos) { - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - if (!eos) { - LOG(INFO) << print_tuple(tuple, *tuple_desc); - } - } -} - -TEST_F(SchemaEnginesScannerTest, use_with_no_init) { - SchemaEnginesScanner scanner; - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaEnginesScannerTest, invalid_param) { - SchemaEnginesScanner scanner; - Status status = scanner.init(&_param, nullptr); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, nullptr, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_open_tables_scanner_test.cpp b/be/test/exec/schema_scanner/schema_open_tables_scanner_test.cpp deleted file mode 100644 index bee3fb8ad4..0000000000 --- a/be/test/exec/schema_scanner/schema_open_tables_scanner_test.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_open_tables_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaOpenTablesScannerTest : public testing::Test { -public: - SchemaOpenTablesScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaOpenTablesScannerTest, normal_use) { - SchemaOpenTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaOpenTablesScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaOpenTablesScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaOpenTablesScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaOpenTablesScannerTest, table_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_table_result = Status::InternalError("get table failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaOpenTablesScannerTest, desc_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_desc_result = Status::InternalError("get desc failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaOpenTablesScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaOpenTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_schemata_scanner_test.cpp b/be/test/exec/schema_scanner/schema_schemata_scanner_test.cpp deleted file mode 100644 index b322fc36a9..0000000000 --- a/be/test/exec/schema_scanner/schema_schemata_scanner_test.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_schemata_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaSchemataScannerTest : public testing::Test { -public: - SchemaSchemataScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaSchemataScannerTest, normal_use) { - SchemaSchemataScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaSchemataScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaSchemataScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaSchemataScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaSchemataScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaSchemataScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaSchemataScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaSchemataScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaSchemataScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_table_names_scanner_test.cpp b/be/test/exec/schema_scanner/schema_table_names_scanner_test.cpp deleted file mode 100644 index cc38d22c80..0000000000 --- a/be/test/exec/schema_scanner/schema_table_names_scanner_test.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_table_names_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaTableNamesScannerTest : public testing::Test { -public: - SchemaTableNamesScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaTableNamesScannerTest, normal_use) { - SchemaTableNamesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaTableNamesScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTableNamesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaTableNamesScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTableNamesScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaTableNamesScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTableNamesScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaTableNamesScannerTest, table_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTableNamesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_table_result = Status::InternalError("get table failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaTableNamesScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTableNamesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_tables_scanner_test.cpp b/be/test/exec/schema_scanner/schema_tables_scanner_test.cpp deleted file mode 100644 index 4fbc399fd9..0000000000 --- a/be/test/exec/schema_scanner/schema_tables_scanner_test.cpp +++ /dev/null @@ -1,204 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_tables_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "exec/schema_scanner/schema_jni_helper.h" -#include "gen_cpp/Frontend_types.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -int db_num = 0; -Status s_db_result; -Status SchemaJniHelper::get_db_names(const TGetDbsParams& db_params, TGetDbsResult* db_result) { - for (int i = 0; i < db_num; ++i) { - db_result->dbs.push_back("abc"); - } - return s_db_result; -} - -int table_num = 0; -Status s_table_result; -Status SchemaJniHelper::get_table_names(const TGetTablesParams& table_params, - TGetTablesResult* table_result) { - for (int i = 0; i < table_num; ++i) { - table_result->tables.push_back("bac"); - } - return s_table_result; -} - -int desc_num = 0; -Status s_desc_result; -Status SchemaJniHelper::describe_table(const TDescribeTableParams& desc_params, - TDescribeTableResult* desc_result) { - for (int i = 0; i < desc_num; ++i) { - TColumnDesc column_desc; - column_desc.__set_columnName("abc"); - column_desc.__set_columnType(TPrimitiveType::BOOLEAN); - TColumnDef column_def; - column_def.columnDesc = column_desc; - column_def.comment = "bac"; - desc_result->columns.push_back(column_def); - } - return s_desc_result; -} - -void init_mock() { - db_num = 0; - table_num = 0; - desc_num = 0; - s_db_result = Status::OK(); - s_table_result = Status::OK(); - s_desc_result = Status::OK(); -} - -class SchemaTablesScannerTest : public testing::Test { -public: - SchemaTablesScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaTablesScannerTest, normal_use) { - SchemaTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaTablesScannerTest, one_column) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_FALSE(eos); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(eos); -} -TEST_F(SchemaTablesScannerTest, op_before_init) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaTablesScannerTest, input_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.init(nullptr, &_obj_pool); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - bool eos = false; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaTablesScannerTest, table_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_table_result = Status::InternalError("get table failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaTablesScannerTest, desc_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - s_desc_result = Status::InternalError("get desc failed"); - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaTablesScannerTest, start_fail) { - table_num = 1; - db_num = 1; - desc_num = 1; - SchemaTablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - s_db_result = Status::InternalError("get db failed."); - status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner/schema_variables_scanner_test.cpp b/be/test/exec/schema_scanner/schema_variables_scanner_test.cpp deleted file mode 100644 index 714d2cee80..0000000000 --- a/be/test/exec/schema_scanner/schema_variables_scanner_test.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_variables_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "service/doris_server.h" -#include "util/debug_util.h" - -namespace doris { - -class SchemaVariablesScannerTest : public testing::Test { -public: - SchemaVariablesScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaVariablesScannerTest, normal_use) { - SchemaVariablesScanner scanner; - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - while (!eos) { - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); - if (!eos) { - LOG(INFO) << print_tuple(tuple, *tuple_desc); - } - } -} - -TEST_F(SchemaVariablesScannerTest, use_with_no_init) { - SchemaVariablesScanner scanner; - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -TEST_F(SchemaVariablesScannerTest, invalid_param) { - SchemaVariablesScanner scanner; - Status status = scanner.init(&_param, nullptr); - EXPECT_FALSE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos = false; - status = scanner.get_next_row(tuple, nullptr, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/schema_scanner_test.cpp b/be/test/exec/schema_scanner_test.cpp deleted file mode 100644 index 4bb9d6ff8d..0000000000 --- a/be/test/exec/schema_scanner_test.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner.h" - -#include - -#include - -#include "common/object_pool.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" - -namespace doris { - -class SchemaScannerTest : public testing::Test { -public: - SchemaScannerTest() {} - - virtual void SetUp() { - _param.db = &_db; - _param.table = &_table; - _param.wild = &_wild; - } - -private: - ObjectPool _obj_pool; - MemPool _mem_pool; - SchemaScannerParam _param; - std::string _db; - std::string _table; - std::string _wild; -}; - -SchemaScanner::ColumnDesc s_test_columns[] = { - // name, type, size, is_null - {"Name", TYPE_VARCHAR, sizeof(StringValue), false}, - {"Location", TYPE_VARCHAR, sizeof(StringValue), false}, - {"Comment", TYPE_VARCHAR, sizeof(StringValue), false}, - {"is_null", TYPE_VARCHAR, sizeof(StringValue), true}, -}; - -char g_tuple_buf[10000]; // enough for tuple -TEST_F(SchemaScannerTest, normal_use) { - SchemaScanner scanner(s_test_columns, - sizeof(s_test_columns) / sizeof(SchemaScanner::ColumnDesc)); - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - EXPECT_EQ(65, tuple_desc->byte_size()); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_TRUE(status.ok()); -} -TEST_F(SchemaScannerTest, input_fail) { - SchemaScanner scanner(s_test_columns, - sizeof(s_test_columns) / sizeof(SchemaScanner::ColumnDesc)); - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.init(&_param, &_obj_pool); - EXPECT_TRUE(status.ok()); - status = scanner.start((RuntimeState*)1); - EXPECT_TRUE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr != tuple_desc); - EXPECT_EQ(65, tuple_desc->byte_size()); - bool eos; - status = scanner.get_next_row(nullptr, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaScannerTest, invalid_param) { - SchemaScanner scanner(nullptr, sizeof(s_test_columns) / sizeof(SchemaScanner::ColumnDesc)); - Status status = scanner.init(&_param, &_obj_pool); - EXPECT_FALSE(status.ok()); -} -TEST_F(SchemaScannerTest, no_init_use) { - SchemaScanner scanner(s_test_columns, - sizeof(s_test_columns) / sizeof(SchemaScanner::ColumnDesc)); - Status status = scanner.start((RuntimeState*)1); - EXPECT_FALSE(status.ok()); - const TupleDescriptor* tuple_desc = scanner.tuple_desc(); - EXPECT_TRUE(nullptr == tuple_desc); - Tuple* tuple = (Tuple*)g_tuple_buf; - bool eos; - status = scanner.get_next_row(tuple, &_mem_pool, &eos); - EXPECT_FALSE(status.ok()); -} - -} // namespace doris diff --git a/be/test/exec/set_executor_test.cpp b/be/test/exec/set_executor_test.cpp deleted file mode 100644 index 5628c90f32..0000000000 --- a/be/test/exec/set_executor_test.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/set_executor.h" - -#include - -#include "common/logging.h" -#include "runtime/exec_env.h" -#include "service/doris_server.h" - -namespace doris { - -class SetExecutorTest : public testing::Test { -public: - SetExecutorTest() : _runtime_state("tmp") {} - - virtual void SetUp() {} - -private: - RuntimeState _runtime_state; -}; - -TEST_F(SetExecutorTest, normal_case) { - ExecEnv exec_env; - DorisServer doris_server(&exec_env); - TSetParams params; - { - TSetVar set_var; - - set_var.type = TSetType::OPT_SESSION; - set_var.variable = "key1"; - TExprNode expr; - expr.node_type = TExprNodeType::STRING_LITERAL; - expr.type = TPrimitiveType::STRING; - expr.__isset.string_literal = true; - expr.string_literal.value = "value1"; - set_var.value.nodes.push_back(expr); - - params.set_vars.push_back(set_var); - } - { - TSetVar set_var; - - set_var.type = TSetType::OPT_GLOBAL; - set_var.variable = "key2"; - TExprNode expr; - expr.node_type = TExprNodeType::STRING_LITERAL; - expr.type = TPrimitiveType::STRING; - expr.__isset.string_literal = true; - expr.string_literal.value = "value2"; - set_var.value.nodes.push_back(expr); - - params.set_vars.push_back(set_var); - } - { - TSetVar set_var; - - set_var.type = TSetType::OPT_DEFAULT; - set_var.variable = "key3"; - TExprNode expr; - expr.node_type = TExprNodeType::STRING_LITERAL; - expr.type = TPrimitiveType::STRING; - expr.__isset.string_literal = true; - expr.string_literal.value = "value3"; - set_var.value.nodes.push_back(expr); - - params.set_vars.push_back(set_var); - } - SetExecutor executor(&doris_server, params); - RowDescriptor row_desc; - Status status = executor.prepare((RuntimeState*)&_runtime_state, row_desc); - EXPECT_TRUE(status.ok()); - LOG(INFO) << executor.debug_string(); -} -TEST_F(SetExecutorTest, failed_case) { - ExecEnv exec_env; - DorisServer doris_server(&exec_env); - TSetParams params; - { - TSetVar set_var; - - set_var.type = TSetType::OPT_SESSION; - set_var.variable = "key1"; - TExprNode expr; - expr.node_type = TExprNodeType::INT_LITERAL; - expr.type = TPrimitiveType::INT; - expr.__isset.int_literal = true; - set_var.value.nodes.push_back(expr); - - params.set_vars.push_back(set_var); - } - SetExecutor executor(&doris_server, params); - RowDescriptor row_desc; - Status status = executor.prepare((RuntimeState*)&_runtime_state, row_desc); - EXPECT_FALSE(status.ok()); - LOG(INFO) << executor.debug_string(); -} -} // namespace doris diff --git a/be/test/exec/tablet_info_test.cpp b/be/test/exec/tablet_info_test.cpp deleted file mode 100644 index 8c86759fab..0000000000 --- a/be/test/exec/tablet_info_test.cpp +++ /dev/null @@ -1,833 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/tablet_info.h" - -#include - -#include "runtime/descriptor_helper.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/row_batch.h" -#include "runtime/tuple_row.h" - -namespace doris { - -class OlapTablePartitionParamTest : public testing::Test { -public: - OlapTablePartitionParamTest() {} - virtual ~OlapTablePartitionParamTest() {} - void SetUp() override {} - -private: -}; - -TOlapTableSchemaParam get_schema(TDescriptorTable* desc_tbl) { - TOlapTableSchemaParam t_schema_param; - t_schema_param.db_id = 1; - t_schema_param.table_id = 2; - t_schema_param.version = 0; - - // descriptor - { - TDescriptorTableBuilder dtb; - TTupleDescriptorBuilder tuple_builder; - - tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_INT).column_name("c1").column_pos(1).build()); - tuple_builder.add_slot( - TSlotDescriptorBuilder().type(TYPE_BIGINT).column_name("c2").column_pos(2).build()); - tuple_builder.add_slot( - TSlotDescriptorBuilder().string_type(20).column_name("c3").column_pos(3).build()); - - tuple_builder.build(&dtb); - - *desc_tbl = dtb.desc_tbl(); - t_schema_param.slot_descs = desc_tbl->slotDescriptors; - t_schema_param.tuple_desc = desc_tbl->tupleDescriptors[0]; - } - // index - t_schema_param.indexes.resize(2); - t_schema_param.indexes[0].id = 4; - t_schema_param.indexes[0].columns = {"c1", "c2", "c3"}; - t_schema_param.indexes[1].id = 5; - t_schema_param.indexes[1].columns = {"c1", "c3"}; - - return t_schema_param; -} - -TEST_F(OlapTablePartitionParamTest, normal) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - LOG(INFO) << schema->debug_string(); - - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_partition_column("c2"); - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(3); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].__isset.end_key = true; - t_partition_param.partitions[0].end_key.node_type = TExprNodeType::INT_LITERAL; - t_partition_param.partitions[0].end_key.type = t_desc_tbl.slotDescriptors[1].slotType; - t_partition_param.partitions[0].end_key.num_children = 0; - t_partition_param.partitions[0].end_key.__isset.int_literal = true; - t_partition_param.partitions[0].end_key.int_literal.value = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - t_partition_param.partitions[0].indexes[1].tablets = {22}; - - t_partition_param.partitions[1].id = 11; - t_partition_param.partitions[1].__isset.start_key = true; - t_partition_param.partitions[1].start_key.node_type = TExprNodeType::INT_LITERAL; - t_partition_param.partitions[1].start_key.type = t_desc_tbl.slotDescriptors[1].slotType; - t_partition_param.partitions[1].start_key.num_children = 0; - t_partition_param.partitions[1].start_key.__isset.int_literal = true; - t_partition_param.partitions[1].start_key.int_literal.value = 10; - t_partition_param.partitions[1].__isset.end_key = true; - t_partition_param.partitions[1].end_key.node_type = TExprNodeType::INT_LITERAL; - t_partition_param.partitions[1].end_key.type = t_desc_tbl.slotDescriptors[1].slotType; - t_partition_param.partitions[1].end_key.num_children = 0; - t_partition_param.partitions[1].end_key.__isset.int_literal = true; - t_partition_param.partitions[1].end_key.int_literal.value = 50; - t_partition_param.partitions[1].num_buckets = 2; - t_partition_param.partitions[1].indexes.resize(2); - t_partition_param.partitions[1].indexes[0].index_id = 4; - t_partition_param.partitions[1].indexes[0].tablets = {31, 32}; - t_partition_param.partitions[1].indexes[1].index_id = 5; - t_partition_param.partitions[1].indexes[1].tablets = {33, 34}; - - t_partition_param.partitions[2].id = 12; - t_partition_param.partitions[2].__isset.start_key = true; - t_partition_param.partitions[2].start_key.node_type = TExprNodeType::INT_LITERAL; - t_partition_param.partitions[2].start_key.type = t_desc_tbl.slotDescriptors[1].slotType; - t_partition_param.partitions[2].start_key.num_children = 0; - t_partition_param.partitions[2].start_key.__isset.int_literal = true; - t_partition_param.partitions[2].start_key.int_literal.value = 60; - t_partition_param.partitions[2].num_buckets = 4; - t_partition_param.partitions[2].indexes.resize(2); - t_partition_param.partitions[2].indexes[0].index_id = 4; - t_partition_param.partitions[2].indexes[0].tablets = {41, 42, 43, 44}; - t_partition_param.partitions[2].indexes[1].index_id = 5; - t_partition_param.partitions[2].indexes[1].tablets = {45, 46, 47, 48}; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_TRUE(st.ok()); - LOG(INFO) << part.debug_string(); - - ObjectPool pool; - DescriptorTbl* desc_tbl = nullptr; - st = DescriptorTbl::create(&pool, t_desc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - RowBatch batch(row_desc, 1024); - // 12, 9, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 9; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - - // 9: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(10, partition->id); - } - // 13, 25, "abcd" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - *reinterpret_cast(tuple->get_slot(8)) = 25; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 4; - memcpy(str_val->ptr, "abcd", str_val->len); - - // 25: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(11, partition->id); - } - // 14, 50, "abcde" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - *reinterpret_cast(tuple->get_slot(8)) = 50; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 5; - memcpy(str_val->ptr, "abcde", str_val->len); - - // 50: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_FALSE(found); - } - - // 15, 60, "abcdef" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 15; - *reinterpret_cast(tuple->get_slot(8)) = 60; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 6; - memcpy(str_val->ptr, "abcdef", str_val->len); - - // 60: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(12, partition->id); - } -} - -/* - *PARTITION BY LIST(`k1`) - * ( - * PARTITION p1 VALUES IN ("1", "2"), - * PARTITION p2 VALUES IN ("3"), - * PARTITION p3 VALUES IN ("4", "5", "6") - * ) - * -*/ -TEST_F(OlapTablePartitionParamTest, single_list_partition) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - LOG(INFO) << schema->debug_string(); - - // 1 | 2 | 3 | 4 | 5 | 6 - std::vector t_node_1; - t_node_1.resize(1); - t_node_1[0].node_type = TExprNodeType::INT_LITERAL; - t_node_1[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_1[0].num_children = 0; - t_node_1[0].__isset.int_literal = true; - t_node_1[0].int_literal.value = 1; - - std::vector t_node_2; - t_node_2.resize(1); - t_node_2[0].node_type = TExprNodeType::INT_LITERAL; - t_node_2[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_2[0].num_children = 0; - t_node_2[0].__isset.int_literal = true; - t_node_2[0].int_literal.value = 2; - - std::vector t_node_3; - t_node_3.resize(1); - t_node_3[0].node_type = TExprNodeType::INT_LITERAL; - t_node_3[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_3[0].num_children = 0; - t_node_3[0].__isset.int_literal = true; - t_node_3[0].int_literal.value = 3; - - std::vector t_node_4; - t_node_4.resize(1); - t_node_4[0].node_type = TExprNodeType::INT_LITERAL; - t_node_4[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_4[0].num_children = 0; - t_node_4[0].__isset.int_literal = true; - t_node_4[0].int_literal.value = 4; - - std::vector t_node_5; - t_node_5.resize(1); - t_node_5[0].node_type = TExprNodeType::INT_LITERAL; - t_node_5[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_5[0].num_children = 0; - t_node_5[0].__isset.int_literal = true; - t_node_5[0].int_literal.value = 5; - - std::vector t_node_6; - t_node_6.resize(1); - t_node_6[0].node_type = TExprNodeType::INT_LITERAL; - t_node_6[0].type = t_desc_tbl.slotDescriptors[1].slotType; - t_node_6[0].num_children = 0; - t_node_6[0].__isset.int_literal = true; - t_node_6[0].int_literal.value = 6; - - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_partition_column("c2"); - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(3); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].__isset.in_keys = true; - t_partition_param.partitions[0].in_keys.emplace_back(t_node_1); - t_partition_param.partitions[0].in_keys.emplace_back(t_node_2); - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - t_partition_param.partitions[0].indexes[1].tablets = {22}; - - t_partition_param.partitions[1].id = 11; - t_partition_param.partitions[1].__isset.in_keys = true; - t_partition_param.partitions[1].in_keys.emplace_back(t_node_3); - t_partition_param.partitions[1].num_buckets = 2; - t_partition_param.partitions[1].indexes.resize(2); - t_partition_param.partitions[1].indexes[0].index_id = 4; - t_partition_param.partitions[1].indexes[0].tablets = {31, 32}; - t_partition_param.partitions[1].indexes[1].index_id = 5; - t_partition_param.partitions[1].indexes[1].tablets = {33, 34}; - - t_partition_param.partitions[2].id = 12; - t_partition_param.partitions[2].__isset.in_keys = true; - t_partition_param.partitions[2].in_keys.emplace_back(t_node_4); - t_partition_param.partitions[2].in_keys.emplace_back(t_node_5); - t_partition_param.partitions[2].in_keys.emplace_back(t_node_6); - t_partition_param.partitions[2].num_buckets = 4; - t_partition_param.partitions[2].indexes.resize(2); - t_partition_param.partitions[2].indexes[0].index_id = 4; - t_partition_param.partitions[2].indexes[0].tablets = {41, 42, 43, 44}; - t_partition_param.partitions[2].indexes[1].index_id = 5; - t_partition_param.partitions[2].indexes[1].tablets = {45, 46, 47, 48}; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_TRUE(st.ok()); - LOG(INFO) << part.debug_string(); - - ObjectPool pool; - DescriptorTbl* desc_tbl = nullptr; - st = DescriptorTbl::create(&pool, t_desc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - RowBatch batch(row_desc, 1024); - // 12, 1, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 1; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - - // 1: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(10, partition->id); - } - // 13, 3, "abcd" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - *reinterpret_cast(tuple->get_slot(8)) = 3; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 4; - memcpy(str_val->ptr, "abcd", str_val->len); - - // 3: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(11, partition->id); - } - // 14, 50, "abcde" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - *reinterpret_cast(tuple->get_slot(8)) = 50; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 5; - memcpy(str_val->ptr, "abcde", str_val->len); - - // 50: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_FALSE(found); - } - - // 15, 6, "abcdef" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 15; - *reinterpret_cast(tuple->get_slot(8)) = 6; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 6; - memcpy(str_val->ptr, "abcdef", str_val->len); - - // 6: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(12, partition->id); - } -} - -/* - * multi list partition key test - * - * PARTITION BY LIST(c2, c3) - * ( - * PARTITION p1 VALUES IN (("1", "beijing"),("1", "shanghai")), - * PARTITION p2 VALUES IN (("1", "tianjin"),("2", "beijing")), - * PARTITION p3 VALUES IN (("2", "shanghai")) - * ) - * - */ -TEST_F(OlapTablePartitionParamTest, multi_list_partition) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - LOG(INFO) << schema->debug_string(); - - // 1 - TExprNode node_1; - node_1.node_type = TExprNodeType::INT_LITERAL; - node_1.type = t_desc_tbl.slotDescriptors[1].slotType; - node_1.num_children = 0; - node_1.__isset.int_literal = true; - node_1.int_literal.value = 1; - // 2 - TExprNode node_2; - node_2.node_type = TExprNodeType::INT_LITERAL; - node_2.type = t_desc_tbl.slotDescriptors[1].slotType; - node_2.num_children = 0; - node_2.__isset.int_literal = true; - node_2.int_literal.value = 2; - // beijing - TExprNode node_b; - node_b.node_type = TExprNodeType::STRING_LITERAL; - node_b.type = t_desc_tbl.slotDescriptors[2].slotType; - node_b.num_children = 0; - node_b.__isset.string_literal = true; - node_b.string_literal.value = "beijing"; - // shanghai - TExprNode node_s; - node_s.node_type = TExprNodeType::STRING_LITERAL; - node_s.type = t_desc_tbl.slotDescriptors[2].slotType; - node_s.num_children = 0; - node_s.__isset.string_literal = true; - node_s.string_literal.value = "shanghai"; - // tianjin - TExprNode node_t; - node_t.node_type = TExprNodeType::STRING_LITERAL; - node_t.type = t_desc_tbl.slotDescriptors[2].slotType; - node_t.num_children = 0; - node_t.__isset.string_literal = true; - node_t.string_literal.value = "tianjin"; - - // (1, beijing) - std::vector t_nodes_1; - t_nodes_1.emplace_back(node_1); - t_nodes_1.emplace_back(node_b); - - // (1, shanghai) - std::vector t_nodes_2; - t_nodes_2.emplace_back(node_1); - t_nodes_2.emplace_back(node_s); - - // (1, tianjin) - std::vector t_nodes_3; - t_nodes_3.emplace_back(node_1); - t_nodes_3.emplace_back(node_t); - - // (2, beijing) - std::vector t_nodes_4; - t_nodes_4.emplace_back(node_2); - t_nodes_4.emplace_back(node_b); - - // (2, shanghai) - std::vector t_nodes_5; - t_nodes_5.emplace_back(node_2); - t_nodes_5.emplace_back(node_s); - - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_partition_columns({"c2", "c3"}); - t_partition_param.__set_distributed_columns({"c1"}); - t_partition_param.partitions.resize(3); - - // (("1", "beijing"),("1", "shanghai")) - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].__isset.in_keys = true; - t_partition_param.partitions[0].in_keys.resize(2); - t_partition_param.partitions[0].in_keys[0] = t_nodes_1; - t_partition_param.partitions[0].in_keys[1] = t_nodes_2; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - t_partition_param.partitions[0].indexes[1].tablets = {22}; - - // (("1", "tianjin"),("2", "beijing")) - t_partition_param.partitions[1].id = 11; - t_partition_param.partitions[1].__isset.in_keys = true; - t_partition_param.partitions[1].in_keys.emplace_back(t_nodes_3); - t_partition_param.partitions[1].in_keys.emplace_back(t_nodes_4); - t_partition_param.partitions[1].num_buckets = 2; - t_partition_param.partitions[1].indexes.resize(2); - t_partition_param.partitions[1].indexes[0].index_id = 4; - t_partition_param.partitions[1].indexes[0].tablets = {31, 32}; - t_partition_param.partitions[1].indexes[1].index_id = 5; - t_partition_param.partitions[1].indexes[1].tablets = {33, 34}; - - // (("2", "shanghai"))) - t_partition_param.partitions[2].id = 12; - t_partition_param.partitions[2].__isset.in_keys = true; - t_partition_param.partitions[2].in_keys.emplace_back(t_nodes_5); - t_partition_param.partitions[2].num_buckets = 4; - t_partition_param.partitions[2].indexes.resize(2); - t_partition_param.partitions[2].indexes[0].index_id = 4; - t_partition_param.partitions[2].indexes[0].tablets = {41, 42, 43, 44}; - t_partition_param.partitions[2].indexes[1].index_id = 5; - t_partition_param.partitions[2].indexes[1].tablets = {45, 46, 47, 48}; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - LOG(INFO) << st; - EXPECT_TRUE(st.ok()); - LOG(INFO) << part.debug_string(); - - ObjectPool pool; - DescriptorTbl* desc_tbl = nullptr; - st = DescriptorTbl::create(&pool, t_desc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - RowBatch batch(row_desc, 1024); - // 12, 1, "beijing" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 1; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 7; - memcpy(str_val->ptr, "beijing", str_val->len); - - // 1, beijing - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(10, partition->id); - } - // 13, 2, "shanghai" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - *reinterpret_cast(tuple->get_slot(8)) = 2; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 8; - memcpy(str_val->ptr, "shanghai", str_val->len); - - // 2, shanghai - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(12, partition->id); - } - // 14, 50, "beijing" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - *reinterpret_cast(tuple->get_slot(8)) = 50; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 7; - memcpy(str_val->ptr, "beijing", str_val->len); - - // 50, beijing - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_FALSE(found); - } - - // 15, 1, "tianjin" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 15; - *reinterpret_cast(tuple->get_slot(8)) = 1; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 7; - memcpy(str_val->ptr, "tianjin", str_val->len); - - // 1, tianjin - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(11, partition->id); - } -} - -TEST_F(OlapTablePartitionParamTest, to_protobuf) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - POlapTableSchemaParam pschema; - schema->to_protobuf(&pschema); - { - std::shared_ptr schema2(new OlapTableSchemaParam()); - auto st = schema2->init(pschema); - EXPECT_TRUE(st.ok()); - - EXPECT_STREQ(schema->debug_string().c_str(), schema2->debug_string().c_str()); - } -} - -TEST_F(OlapTablePartitionParamTest, unknown_index_column) { - TDescriptorTable t_desc_tbl; - auto tschema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - tschema.indexes[0].columns.push_back("unknown_col"); - auto st = schema->init(tschema); - EXPECT_FALSE(st.ok()); -} - -TEST_F(OlapTablePartitionParamTest, unpartitioned) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(1); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_TRUE(st.ok()); - - ObjectPool pool; - DescriptorTbl* desc_tbl = nullptr; - st = DescriptorTbl::create(&pool, t_desc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - RowBatch batch(row_desc, 1024); - // 12, 9, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 9; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - - // 9: - const OlapTablePartition* partition = nullptr; - auto found = part.find_partition(tuple, &partition); - EXPECT_TRUE(found); - EXPECT_EQ(10, partition->id); - } -} - -TEST_F(OlapTablePartitionParamTest, unknown_partition_column) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_partition_column("c4"); - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(1); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_FALSE(st.ok()); -} - -TEST_F(OlapTablePartitionParamTest, unknown_distributed_col) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_distributed_columns({"c4"}); - t_partition_param.partitions.resize(1); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 5; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_FALSE(st.ok()); -} - -TEST_F(OlapTablePartitionParamTest, bad_index) { - TDescriptorTable t_desc_tbl; - auto t_schema = get_schema(&t_desc_tbl); - std::shared_ptr schema(new OlapTableSchemaParam()); - auto st = schema->init(t_schema); - EXPECT_TRUE(st.ok()); - - { - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(1); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(1); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_FALSE(st.ok()); - } - { - // (-oo, 10] | [10.50) | [60, +oo) - TOlapTablePartitionParam t_partition_param; - t_partition_param.db_id = 1; - t_partition_param.table_id = 2; - t_partition_param.version = 0; - t_partition_param.__set_partition_column("c4"); - t_partition_param.__set_distributed_columns({"c1", "c3"}); - t_partition_param.partitions.resize(1); - t_partition_param.partitions[0].id = 10; - t_partition_param.partitions[0].num_buckets = 1; - t_partition_param.partitions[0].indexes.resize(2); - t_partition_param.partitions[0].indexes[0].index_id = 4; - t_partition_param.partitions[0].indexes[0].tablets = {21}; - t_partition_param.partitions[0].indexes[1].index_id = 6; - - OlapTablePartitionParam part(schema, t_partition_param); - st = part.init(); - EXPECT_FALSE(st.ok()); - } -} - -TEST_F(OlapTablePartitionParamTest, tableLoacation) { - TOlapTableLocationParam tparam; - tparam.tablets.resize(1); - tparam.tablets[0].tablet_id = 1; - OlapTableLocationParam location(tparam); - { - auto loc = location.find_tablet(1); - EXPECT_TRUE(loc != nullptr); - } - { - auto loc = location.find_tablet(2); - EXPECT_TRUE(loc == nullptr); - } -} - -TEST_F(OlapTablePartitionParamTest, NodesInfo) { - TPaloNodesInfo tinfo; - tinfo.nodes.resize(1); - tinfo.nodes[0].id = 1; - DorisNodesInfo nodes(tinfo); - { - auto node = nodes.find_node(1); - EXPECT_TRUE(node != nullptr); - } - { - auto node = nodes.find_node(2); - EXPECT_TRUE(node == nullptr); - } -} - -} // namespace doris diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp deleted file mode 100644 index 566d9675be..0000000000 --- a/be/test/exec/tablet_sink_test.cpp +++ /dev/null @@ -1,982 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/tablet_sink.h" - -#include - -#include "common/config.h" -#include "gen_cpp/HeartbeatService_types.h" -#include "gen_cpp/internal_service.pb.h" -#include "runtime/decimalv2_value.h" -#include "runtime/descriptor_helper.h" -#include "runtime/exec_env.h" -#include "runtime/result_queue_mgr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/stream_load/load_stream_mgr.h" -#include "runtime/thread_resource_mgr.h" -#include "runtime/tuple_row.h" -#include "runtime/types.h" -#include "service/brpc.h" -#include "util/brpc_client_cache.h" -#include "util/cpu_info.h" -#include "util/debug/leakcheck_disabler.h" -#include "util/proto_util.h" - -namespace doris { -namespace stream_load { - -Status k_add_batch_status; - -class OlapTableSinkTest : public testing::Test { -public: - OlapTableSinkTest() {} - virtual ~OlapTableSinkTest() {} - void SetUp() override { - k_add_batch_status = Status::OK(); - _env = ExecEnv::GetInstance(); - _env->_thread_mgr = new ThreadResourceMgr(); - _env->_master_info = new TMasterInfo(); - _env->_load_stream_mgr = new LoadStreamMgr(); - _env->_internal_client_cache = new BrpcClientCache(); - _env->_function_client_cache = new BrpcClientCache(); - ThreadPoolBuilder("SendBatchThreadPool") - .set_min_threads(1) - .set_max_threads(5) - .set_max_queue_size(100) - .build(&_env->_send_batch_thread_pool); - config::tablet_writer_open_rpc_timeout_sec = 60; - config::max_send_batch_parallelism_per_job = 1; - } - - void TearDown() override { - SAFE_DELETE(_env->_internal_client_cache); - SAFE_DELETE(_env->_function_client_cache); - SAFE_DELETE(_env->_load_stream_mgr); - SAFE_DELETE(_env->_master_info); - SAFE_DELETE(_env->_thread_mgr); - if (_server) { - _server->Stop(100); - _server->Join(); - SAFE_DELETE(_server); - } - } - -private: - ExecEnv* _env = nullptr; - brpc::Server* _server = nullptr; -}; - -TDataSink get_data_sink(TDescriptorTable* desc_tbl) { - int64_t db_id = 1; - int64_t table_id = 2; - int64_t partition_id = 3; - int64_t index1_id = 4; - int64_t tablet1_id = 6; - int64_t tablet2_id = 7; - - TDataSink data_sink; - data_sink.type = TDataSinkType::OLAP_TABLE_SINK; - data_sink.__isset.olap_table_sink = true; - - TOlapTableSink& tsink = data_sink.olap_table_sink; - tsink.load_id.hi = 123; - tsink.load_id.lo = 456; - tsink.txn_id = 789; - tsink.db_id = 1; - tsink.table_id = 2; - tsink.tuple_id = 0; - tsink.num_replicas = 3; - tsink.db_name = "testDb"; - tsink.table_name = "testTable"; - - // construct schema - TOlapTableSchemaParam& tschema = tsink.schema; - tschema.db_id = 1; - tschema.table_id = 2; - tschema.version = 0; - - // descriptor - { - TDescriptorTableBuilder dtb; - { - TTupleDescriptorBuilder tuple_builder; - - tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_INT) - .column_name("c1") - .column_pos(1) - .build()); - tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_BIGINT) - .column_name("c2") - .column_pos(2) - .build()); - tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(10) - .column_name("c3") - .column_pos(3) - .build()); - - tuple_builder.build(&dtb); - } - { - TTupleDescriptorBuilder tuple_builder; - - tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_INT) - .column_name("c1") - .column_pos(1) - .build()); - tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_BIGINT) - .column_name("c2") - .column_pos(2) - .build()); - tuple_builder.add_slot(TSlotDescriptorBuilder() - .string_type(20) - .column_name("c3") - .column_pos(3) - .build()); - - tuple_builder.build(&dtb); - } - - *desc_tbl = dtb.desc_tbl(); - tschema.slot_descs = desc_tbl->slotDescriptors; - tschema.tuple_desc = desc_tbl->tupleDescriptors[0]; - } - // index - tschema.indexes.resize(1); - tschema.indexes[0].id = index1_id; - tschema.indexes[0].columns = {"c1", "c2", "c3"}; - // tschema.indexes[1].id = 5; - // tschema.indexes[1].columns = {"c1", "c3"}; - // partition - TOlapTablePartitionParam& tpartition = tsink.partition; - tpartition.db_id = db_id; - tpartition.table_id = table_id; - tpartition.version = table_id; - tpartition.__set_partition_column("c2"); - tpartition.__set_distributed_columns({"c1", "c3"}); - tpartition.partitions.resize(1); - tpartition.partitions[0].id = partition_id; - tpartition.partitions[0].num_buckets = 2; - tpartition.partitions[0].indexes.resize(1); - tpartition.partitions[0].indexes[0].index_id = index1_id; - tpartition.partitions[0].indexes[0].tablets = {tablet1_id, tablet2_id}; - // location - TOlapTableLocationParam& location = tsink.location; - location.db_id = db_id; - location.table_id = table_id; - location.version = 0; - location.tablets.resize(2); - location.tablets[0].tablet_id = tablet1_id; - location.tablets[0].node_ids = {0, 1, 2}; - location.tablets[1].tablet_id = tablet2_id; - location.tablets[1].node_ids = {0, 1, 2}; - // location - TPaloNodesInfo& nodes_info = tsink.nodes_info; - nodes_info.nodes.resize(3); - nodes_info.nodes[0].id = 0; - nodes_info.nodes[0].host = "127.0.0.1"; - nodes_info.nodes[0].async_internal_port = 4356; - nodes_info.nodes[1].id = 1; - nodes_info.nodes[1].host = "127.0.0.1"; - nodes_info.nodes[1].async_internal_port = 4356; - nodes_info.nodes[2].id = 2; - nodes_info.nodes[2].host = "127.0.0.1"; - nodes_info.nodes[2].async_internal_port = 4357; - - return data_sink; -} - -TDataSink get_decimal_sink(TDescriptorTable* desc_tbl) { - int64_t db_id = 1; - int64_t table_id = 2; - int64_t partition_id = 3; - int64_t index1_id = 4; - int64_t tablet1_id = 6; - int64_t tablet2_id = 7; - - TDataSink data_sink; - data_sink.type = TDataSinkType::OLAP_TABLE_SINK; - data_sink.__isset.olap_table_sink = true; - - TOlapTableSink& tsink = data_sink.olap_table_sink; - tsink.load_id.hi = 123; - tsink.load_id.lo = 456; - tsink.txn_id = 789; - tsink.db_id = 1; - tsink.table_id = 2; - tsink.tuple_id = 0; - tsink.num_replicas = 3; - tsink.db_name = "testDb"; - tsink.table_name = "testTable"; - - // construct schema - TOlapTableSchemaParam& tschema = tsink.schema; - tschema.db_id = 1; - tschema.table_id = 2; - tschema.version = 0; - - // descriptor - { - TDescriptorTableBuilder dtb; - { - TTupleDescriptorBuilder tuple_builder; - - tuple_builder.add_slot(TSlotDescriptorBuilder() - .type(TYPE_INT) - .column_name("c1") - .column_pos(1) - .build()); - tuple_builder.add_slot(TSlotDescriptorBuilder() - .decimal_type(5, 2) - .column_name("c2") - .column_pos(2) - .build()); - - tuple_builder.build(&dtb); - } - - *desc_tbl = dtb.desc_tbl(); - tschema.slot_descs = desc_tbl->slotDescriptors; - tschema.tuple_desc = desc_tbl->tupleDescriptors[0]; - } - // index - tschema.indexes.resize(1); - tschema.indexes[0].id = index1_id; - tschema.indexes[0].columns = {"c1", "c2"}; - // tschema.indexes[1].id = 5; - // tschema.indexes[1].columns = {"c1", "c3"}; - // partition - TOlapTablePartitionParam& tpartition = tsink.partition; - tpartition.db_id = db_id; - tpartition.table_id = table_id; - tpartition.version = table_id; - tpartition.__set_partition_column("c1"); - tpartition.__set_distributed_columns({"c2"}); - tpartition.partitions.resize(1); - tpartition.partitions[0].id = partition_id; - tpartition.partitions[0].num_buckets = 2; - tpartition.partitions[0].indexes.resize(1); - tpartition.partitions[0].indexes[0].index_id = index1_id; - tpartition.partitions[0].indexes[0].tablets = {tablet1_id, tablet2_id}; - // location - TOlapTableLocationParam& location = tsink.location; - location.db_id = db_id; - location.table_id = table_id; - location.version = 0; - location.tablets.resize(2); - location.tablets[0].tablet_id = tablet1_id; - location.tablets[0].node_ids = {0, 1, 2}; - location.tablets[1].tablet_id = tablet2_id; - location.tablets[1].node_ids = {0, 1, 2}; - // location - TPaloNodesInfo& nodes_info = tsink.nodes_info; - nodes_info.nodes.resize(3); - nodes_info.nodes[0].id = 0; - nodes_info.nodes[0].host = "127.0.0.1"; - nodes_info.nodes[0].async_internal_port = 4356; - nodes_info.nodes[1].id = 1; - nodes_info.nodes[1].host = "127.0.0.1"; - nodes_info.nodes[1].async_internal_port = 4356; - nodes_info.nodes[2].id = 2; - nodes_info.nodes[2].host = "127.0.0.1"; - nodes_info.nodes[2].async_internal_port = 4357; - - return data_sink; -} - -class TestInternalService : public PBackendService { -public: - TestInternalService() {} - virtual ~TestInternalService() {} - - void transmit_data(::google::protobuf::RpcController* controller, - const ::doris::PTransmitDataParams* request, - ::doris::PTransmitDataResult* response, - ::google::protobuf::Closure* done) override { - brpc::ClosureGuard done_guard(done); - } - - void tablet_writer_open(google::protobuf::RpcController* controller, - const PTabletWriterOpenRequest* request, - PTabletWriterOpenResult* response, - google::protobuf::Closure* done) override { - brpc::ClosureGuard done_guard(done); - Status status; - status.to_protobuf(response->mutable_status()); - } - - void tablet_writer_add_batch(google::protobuf::RpcController* controller, - const PTabletWriterAddBatchRequest* request, - PTabletWriterAddBatchResult* response, - google::protobuf::Closure* done) override { - brpc::ClosureGuard done_guard(done); - { - std::lock_guard l(_lock); - _row_counters += request->tablet_ids_size(); - if (request->eos()) { - _eof_counters++; - } - k_add_batch_status.to_protobuf(response->mutable_status()); - - if (request->has_row_batch() && _row_desc != nullptr) { - brpc::Controller* cntl = static_cast(controller); - attachment_transfer_request_row_batch(request, cntl); - RowBatch batch(*_row_desc, request->row_batch()); - for (int i = 0; i < batch.num_rows(); ++i) { - LOG(INFO) << batch.get_row(i)->to_string(*_row_desc); - _output_set->emplace(batch.get_row(i)->to_string(*_row_desc)); - } - } - } - } - void tablet_writer_cancel(google::protobuf::RpcController* controller, - const PTabletWriterCancelRequest* request, - PTabletWriterCancelResult* response, - google::protobuf::Closure* done) override { - brpc::ClosureGuard done_guard(done); - } - - std::mutex _lock; - int64_t _eof_counters = 0; - int64_t _row_counters = 0; - RowDescriptor* _row_desc = nullptr; - std::set* _output_set = nullptr; -}; - -TEST_F(OlapTableSinkTest, normal) { - // start brpc service first - _server = new brpc::Server(); - auto service = new TestInternalService(); - EXPECT_EQ(_server->AddService(service, brpc::SERVER_OWNS_SERVICE), 0); - brpc::ServerOptions options; - { - debug::ScopedLeakCheckDisabler disable_lsan; - _server->Start(4356, &options); - } - - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - LOG(INFO) << "tuple_desc=" << tuple_desc->debug_string(); - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - OlapTableSink sink(&obj_pool, row_desc, {}, &st); - EXPECT_TRUE(st.ok()); - - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - // prepare - st = sink.prepare(&state); - EXPECT_TRUE(st.ok()); - // open - st = sink.open(&state); - EXPECT_TRUE(st.ok()); - // send - RowBatch batch(row_desc, 1024); - // 12, 9, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 9; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - batch.commit_last_row(); - } - // 13, 25, "abcd" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - *reinterpret_cast(tuple->get_slot(8)) = 25; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 4; - memcpy(str_val->ptr, "abcd", str_val->len); - - batch.commit_last_row(); - } - // 14, 50, "abcde" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - *reinterpret_cast(tuple->get_slot(8)) = 50; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(16)); - str_val->len = 15; - memcpy(str_val->ptr, "abcde1234567890", str_val->len); - - batch.commit_last_row(); - } - st = sink.send(&state, &batch); - EXPECT_TRUE(st.ok()); - // close - st = sink.close(&state, Status::OK()); - EXPECT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") - << st.to_string(); - - // each node has a eof - EXPECT_EQ(2, service->_eof_counters); - EXPECT_EQ(2 * 2, service->_row_counters); - - // 2node * 2 - EXPECT_EQ(1, state.num_rows_load_filtered()); -} - -TEST_F(OlapTableSinkTest, convert) { - // start brpc service first - _server = new brpc::Server(); - auto service = new TestInternalService(); - EXPECT_EQ(_server->AddService(service, brpc::SERVER_OWNS_SERVICE), 0); - brpc::ServerOptions options; - { - debug::ScopedLeakCheckDisabler disable_lsan; - _server->Start(4356, &options); - } - - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1024; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - // expr - std::vector exprs; - exprs.resize(3); - exprs[0].nodes.resize(1); - exprs[0].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[0].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[0].nodes[0].num_children = 0; - exprs[0].nodes[0].__isset.slot_ref = true; - exprs[0].nodes[0].slot_ref.slot_id = 0; - exprs[0].nodes[0].slot_ref.tuple_id = 1; - - exprs[1].nodes.resize(1); - exprs[1].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[1].nodes[0].type = tdesc_tbl.slotDescriptors[4].slotType; - exprs[1].nodes[0].num_children = 0; - exprs[1].nodes[0].__isset.slot_ref = true; - exprs[1].nodes[0].slot_ref.slot_id = 1; - exprs[1].nodes[0].slot_ref.tuple_id = 1; - - exprs[2].nodes.resize(1); - exprs[2].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[2].nodes[0].type = tdesc_tbl.slotDescriptors[5].slotType; - exprs[2].nodes[0].num_children = 0; - exprs[2].nodes[0].__isset.slot_ref = true; - exprs[2].nodes[0].slot_ref.slot_id = 2; - exprs[2].nodes[0].slot_ref.tuple_id = 1; - - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 1; - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - // prepare - st = sink.prepare(&state); - EXPECT_TRUE(st.ok()); - // open - st = sink.open(&state); - EXPECT_TRUE(st.ok()); - // send - RowBatch batch(row_desc, 1024); - // 12, 9, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 9; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - batch.commit_last_row(); - } - // 13, 25, "abcd" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - *reinterpret_cast(tuple->get_slot(8)) = 25; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 4; - memcpy(str_val->ptr, "abcd", str_val->len); - - batch.commit_last_row(); - } - // 14, 50, "abcde" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - *reinterpret_cast(tuple->get_slot(8)) = 50; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = reinterpret_cast(batch.tuple_data_pool()->allocate(10)); - str_val->len = 5; - memcpy(str_val->ptr, "abcde", str_val->len); - - batch.commit_last_row(); - } - st = sink.send(&state, &batch); - EXPECT_TRUE(st.ok()); - // close - st = sink.close(&state, Status::OK()); - EXPECT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") - << st.to_string(); - - // each node has a eof - EXPECT_EQ(2, service->_eof_counters); - EXPECT_EQ(2 * 3, service->_row_counters); - - // 2node * 2 - EXPECT_EQ(0, state.num_rows_load_filtered()); -} - -TEST_F(OlapTableSinkTest, init_fail1) { - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - // expr - std::vector exprs; - exprs.resize(1); - exprs[0].nodes.resize(1); - exprs[0].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[0].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[0].nodes[0].num_children = 0; - exprs[0].nodes[0].__isset.slot_ref = true; - exprs[0].nodes[0].slot_ref.slot_id = 0; - exprs[0].nodes[0].slot_ref.tuple_id = 1; - - { - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 5; - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - st = sink.prepare(&state); - EXPECT_FALSE(st.ok()); - sink.close(&state, st); - } - { - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 1; - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - st = sink.prepare(&state); - EXPECT_FALSE(st.ok()); - sink.close(&state, st); - } -} - -TEST_F(OlapTableSinkTest, init_fail3) { - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - // expr - std::vector exprs; - exprs.resize(3); - exprs[0].nodes.resize(1); - exprs[0].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[0].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[0].nodes[0].num_children = 0; - exprs[0].nodes[0].__isset.slot_ref = true; - exprs[0].nodes[0].slot_ref.slot_id = 0; - exprs[0].nodes[0].slot_ref.tuple_id = 1; - - exprs[1].nodes.resize(1); - exprs[1].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[1].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[1].nodes[0].num_children = 0; - exprs[1].nodes[0].__isset.slot_ref = true; - exprs[1].nodes[0].slot_ref.slot_id = 1; - exprs[1].nodes[0].slot_ref.tuple_id = 1; - - exprs[2].nodes.resize(1); - exprs[2].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[2].nodes[0].type = tdesc_tbl.slotDescriptors[5].slotType; - exprs[2].nodes[0].num_children = 0; - exprs[2].nodes[0].__isset.slot_ref = true; - exprs[2].nodes[0].slot_ref.slot_id = 2; - exprs[2].nodes[0].slot_ref.tuple_id = 1; - - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 1; - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - st = sink.prepare(&state); - EXPECT_FALSE(st.ok()); - sink.close(&state, st); -} - -TEST_F(OlapTableSinkTest, init_fail4) { - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - // expr - std::vector exprs; - exprs.resize(3); - exprs[0].nodes.resize(1); - exprs[0].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[0].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[0].nodes[0].num_children = 0; - exprs[0].nodes[0].__isset.slot_ref = true; - exprs[0].nodes[0].slot_ref.slot_id = 0; - exprs[0].nodes[0].slot_ref.tuple_id = 1; - - exprs[1].nodes.resize(1); - exprs[1].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[1].nodes[0].type = tdesc_tbl.slotDescriptors[4].slotType; - exprs[1].nodes[0].num_children = 0; - exprs[1].nodes[0].__isset.slot_ref = true; - exprs[1].nodes[0].slot_ref.slot_id = 1; - exprs[1].nodes[0].slot_ref.tuple_id = 1; - - exprs[2].nodes.resize(1); - exprs[2].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[2].nodes[0].type = tdesc_tbl.slotDescriptors[5].slotType; - exprs[2].nodes[0].num_children = 0; - exprs[2].nodes[0].__isset.slot_ref = true; - exprs[2].nodes[0].slot_ref.slot_id = 2; - exprs[2].nodes[0].slot_ref.tuple_id = 1; - - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 1; - // init - t_data_sink.olap_table_sink.partition.partitions[0].indexes[0].tablets = {101, 102}; - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - st = sink.prepare(&state); - EXPECT_FALSE(st.ok()); - sink.close(&state, st); -} - -TEST_F(OlapTableSinkTest, add_batch_failed) { - // start brpc service first - _server = new brpc::Server(); - auto service = new TestInternalService(); - EXPECT_EQ(_server->AddService(service, brpc::SERVER_OWNS_SERVICE), 0); - brpc::ServerOptions options; - { - debug::ScopedLeakCheckDisabler disable_lsan; - _server->Start(4356, &options); - } - - // ObjectPool create before RuntimeState, simulate actual situation better. - ObjectPool obj_pool; - - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_data_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - - // expr - std::vector exprs; - exprs.resize(3); - exprs[0].nodes.resize(1); - exprs[0].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[0].nodes[0].type = tdesc_tbl.slotDescriptors[3].slotType; - exprs[0].nodes[0].num_children = 0; - exprs[0].nodes[0].__isset.slot_ref = true; - exprs[0].nodes[0].slot_ref.slot_id = 0; - exprs[0].nodes[0].slot_ref.tuple_id = 1; - - exprs[1].nodes.resize(1); - exprs[1].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[1].nodes[0].type = tdesc_tbl.slotDescriptors[4].slotType; - exprs[1].nodes[0].num_children = 0; - exprs[1].nodes[0].__isset.slot_ref = true; - exprs[1].nodes[0].slot_ref.slot_id = 1; - exprs[1].nodes[0].slot_ref.tuple_id = 1; - - exprs[2].nodes.resize(1); - exprs[2].nodes[0].node_type = TExprNodeType::SLOT_REF; - exprs[2].nodes[0].type = tdesc_tbl.slotDescriptors[5].slotType; - exprs[2].nodes[0].num_children = 0; - exprs[2].nodes[0].__isset.slot_ref = true; - exprs[2].nodes[0].slot_ref.slot_id = 2; - exprs[2].nodes[0].slot_ref.tuple_id = 1; - - OlapTableSink sink(&obj_pool, row_desc, exprs, &st); - EXPECT_TRUE(st.ok()); - - // set output tuple_id - t_data_sink.olap_table_sink.tuple_id = 1; - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - st = sink.prepare(&state); - EXPECT_TRUE(st.ok()); - st = sink.open(&state); - EXPECT_TRUE(st.ok()); - // send - RowBatch batch(row_desc, 1024); - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - // 12, 9, "abc" - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - *reinterpret_cast(tuple->get_slot(8)) = 9; - StringValue* str_val = reinterpret_cast(tuple->get_slot(16)); - str_val->ptr = (char*)batch.tuple_data_pool()->allocate(10); - str_val->len = 3; - memcpy(str_val->ptr, "abc", str_val->len); - batch.commit_last_row(); - } - - // Channels will be cancelled internally, coz brpc returns k_add_batch_status. - k_add_batch_status = Status::InternalError("dummy failed"); - st = sink.send(&state, &batch); - EXPECT_TRUE(st.ok()); - - // Send batch multiple times, can make _cur_batch or _pending_batches(in channels) not empty. - // To ensure the order of releasing resource is OK. - sink.send(&state, &batch); - sink.send(&state, &batch); - - // close - st = sink.close(&state, Status::OK()); - EXPECT_FALSE(st.ok()); -} - -TEST_F(OlapTableSinkTest, decimal) { - // start brpc service first - _server = new brpc::Server(); - auto service = new TestInternalService(); - EXPECT_EQ(_server->AddService(service, brpc::SERVER_OWNS_SERVICE), 0); - brpc::ServerOptions options; - { - debug::ScopedLeakCheckDisabler disable_lsan; - _server->Start(4356, &options); - } - - TUniqueId fragment_id; - TQueryOptions query_options; - query_options.batch_size = 1; - RuntimeState state(fragment_id, query_options, TQueryGlobals(), _env); - state.init_mem_trackers(TUniqueId()); - - ObjectPool obj_pool; - TDescriptorTable tdesc_tbl; - auto t_data_sink = get_decimal_sink(&tdesc_tbl); - - // crate desc_tabl - DescriptorTbl* desc_tbl = nullptr; - auto st = DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl); - EXPECT_TRUE(st.ok()); - state._desc_tbl = desc_tbl; - - TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0); - LOG(INFO) << "tuple_desc=" << tuple_desc->debug_string(); - - RowDescriptor row_desc(*desc_tbl, {0}, {false}); - service->_row_desc = &row_desc; - std::set output_set; - service->_output_set = &output_set; - - OlapTableSink sink(&obj_pool, row_desc, {}, &st); - EXPECT_TRUE(st.ok()); - - // init - st = sink.init(t_data_sink); - EXPECT_TRUE(st.ok()); - // prepare - st = sink.prepare(&state); - EXPECT_TRUE(st.ok()); - // open - st = sink.open(&state); - EXPECT_TRUE(st.ok()); - // send - RowBatch batch(row_desc, 1024); - // 12, 12.3 - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 12; - DecimalV2Value* dec_val = reinterpret_cast(tuple->get_slot(16)); - *dec_val = DecimalV2Value(std::string("12.3")); - batch.commit_last_row(); - } - // 13, 123.123456789 - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 13; - DecimalV2Value* dec_val = reinterpret_cast(tuple->get_slot(16)); - *dec_val = DecimalV2Value(std::string("123.123456789")); - - batch.commit_last_row(); - } - // 14, 123456789123.1234 - { - Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size()); - batch.get_row(batch.add_row())->set_tuple(0, tuple); - memset(tuple, 0, tuple_desc->byte_size()); - - *reinterpret_cast(tuple->get_slot(4)) = 14; - DecimalV2Value* dec_val = reinterpret_cast(tuple->get_slot(16)); - *dec_val = DecimalV2Value(std::string("123456789123.1234")); - - batch.commit_last_row(); - } - st = sink.send(&state, &batch); - EXPECT_TRUE(st.ok()); - // close - st = sink.close(&state, Status::OK()); - EXPECT_TRUE(st.ok() || st.to_string() == "Internal error: wait close failed. ") - << st.to_string(); - - EXPECT_EQ(2, output_set.size()); - EXPECT_TRUE(output_set.count("[(12 12.3)]") > 0); - EXPECT_TRUE(output_set.count("[(13 123.12)]") > 0); - // EXPECT_TRUE(output_set.count("[(14 999.99)]") > 0); -} - -} // namespace stream_load -} // namespace doris diff --git a/be/test/exec/unix_odbc_test.cpp b/be/test/exec/unix_odbc_test.cpp deleted file mode 100644 index 051393f11e..0000000000 --- a/be/test/exec/unix_odbc_test.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include diff --git a/be/test/runtime/buffered_tuple_stream_test.cpp b/be/test/runtime/buffered_tuple_stream_test.cpp deleted file mode 100644 index 085200b163..0000000000 --- a/be/test/runtime/buffered_tuple_stream_test.cpp +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/buffered_tuple_stream.h" - -#include -#include -#include - -#include - -#include "common/object_pool.h" -#include "exec/sort_exec_exprs.h" -#include "exprs/expr.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/ImpalaInternalService_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/buffered_block_mgr.h" -#include "runtime/descriptors.h" -#include "runtime/primitive_type.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/sorter.h" -#include "runtime/tuple_row.h" -#include "util/debug_util.h" - -namespace doris { - -class BufferedTupleStreamTest : public testing::Test { -public: - RowBatch* create_row_batch(int num_rows); - BufferedTupleStreamTest() { - _object_pool = new ObjectPool(); - _profile = new RuntimeProfile("bufferedStream"); - _runtime_state = new RuntimeState("BufferedTupleStreamTest"); - _runtime_state->exec_env_ = &_exec_env; - _runtime_state->create_block_mgr(); - { - TExpr expr; - { - TExprNode node; - - node.node_type = TExprNodeType::SLOT_REF; - node.type = ToTColumnTypeThrift(TPrimitiveType::BIGINT); - node.num_children = 0; - TSlotRef data; - data.slot_id = 0; - data.tuple_id = 0; - node.__set_slot_ref(data); - expr.nodes.push_back(node); - } - _sort_tuple_slot_expr.push_back(expr); - } - { - TExpr expr; - { - TExprNode node; - - node.node_type = TExprNodeType::SLOT_REF; - node.type = ToTColumnTypeThrift(TPrimitiveType::BIGINT); - node.num_children = 0; - TSlotRef data; - data.slot_id = 1; - data.tuple_id = 1; - node.__set_slot_ref(data); - expr.nodes.push_back(node); - } - _ordering_exprs.push_back(expr); - } - _is_asc_order.push_back(true); - _nulls_first.push_back(true); - - { - TTupleDescriptor tuple_desc; - TDescriptorTable thrift_desc_tbl; - { - tuple_desc.__set_id(0); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(1); - thrift_desc_tbl.tupleDescriptors.push_back(tuple_desc); - } - { - tuple_desc.__set_id(1); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(1); - thrift_desc_tbl.tupleDescriptors.push_back(tuple_desc); - } - - TSlotDescriptor slot_desc; - { - slot_desc.__set_id(0); - slot_desc.__set_parent(0); - slot_desc.__set_slotType(TPrimitiveType::BIGINT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(0); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(-1); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - thrift_desc_tbl.slotDescriptors.push_back(slot_desc); - } - { - slot_desc.__set_id(1); - slot_desc.__set_parent(1); - slot_desc.__set_slotType(TPrimitiveType::BIGINT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(0); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(-1); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - thrift_desc_tbl.slotDescriptors.push_back(slot_desc); - } - Status status = DescriptorTbl::Create(_object_pool, thrift_desc_tbl, &_desc_tbl); - DCHECK(status.ok()); - _runtime_state->set_desc_tbl(_desc_tbl); - } - { - std::vector row_tuples; - std::vector nullable_tuples; - nullable_tuples.push_back(0); - row_tuples.push_back(0); - _child_row_desc = new RowDescriptor(*_desc_tbl, row_tuples, nullable_tuples); - } - /* - { - std::vector row_tuples; - std::vector nullable_tuples; - nullable_tuples.push_back(1); - row_tuples.push_back(1); - _output_row_desc = new RowDescriptor(*_desc_tbl, row_tuples, nullable_tuples); - } - */ - } - virtual ~BufferedTupleStreamTest() { - delete _child_row_desc; - delete _runtime_state; - delete _profile; - delete _object_pool; - // delete _output_row_desc; - } - -protected: - virtual void SetUp() {} - -private: - ExecEnv _exec_env; - RuntimeState* _runtime_state; - RowDescriptor* _child_row_desc; - RowDescriptor* _output_row_desc; - DescriptorTbl* _desc_tbl; - ObjectPool* _object_pool; - std::vector _sort_tuple_slot_expr; - std::vector _ordering_exprs; - std::vector _is_asc_order; - std::vector _nulls_first; - - RuntimeProfile* _profile; -}; - -RowBatch* BufferedTupleStreamTest::create_row_batch(int num_rows) { - RowBatch* batch = _object_pool->Add(new RowBatch(*_child_row_desc, num_rows)); - int64_t* tuple_mem = reinterpret_cast( - batch->tuple_data_pool()->Allocate(sizeof(int64_t) * num_rows)); - - for (int i = 0; i < num_rows; ++i) { - int idx = batch->AddRow(); - TupleRow* row = batch->GetRow(idx); - *tuple_mem = i; - row->SetTuple(0, reinterpret_cast(tuple_mem)); - - batch->CommitLastRow(); - tuple_mem++; - } - return batch; -} - -TEST_F(BufferedTupleStreamTest, init_bufferStream) { - BufferedTupleStream* input_stream = - new BufferedTupleStream(_runtime_state, *_child_row_desc, _runtime_state->block_mgr()); - Status status = input_stream->init(_profile); - EXPECT_TRUE(status.ok()); - input_stream->close(); - delete input_stream; -} - -TEST_F(BufferedTupleStreamTest, addRow_bufferStream) { - BufferedTupleStream* input_stream = - new BufferedTupleStream(_runtime_state, *_child_row_desc, _runtime_state->block_mgr()); - Status status = input_stream->init(_profile); - EXPECT_TRUE(status.ok()); - int num_rows = 5; - RowBatch* batch = create_row_batch(num_rows); - for (int i = 0; i < num_rows; i++) { - TupleRow* row = batch->GetRow(i); - input_stream->add_row(row); - EXPECT_TRUE(status.ok()); - } - EXPECT_EQ(input_stream->num_rows(), num_rows); - input_stream->close(); - delete input_stream; -} - -TEST_F(BufferedTupleStreamTest, getNext_bufferStream) { - BufferedTupleStream* input_stream = - new BufferedTupleStream(_runtime_state, *_child_row_desc, _runtime_state->block_mgr()); - Status status = input_stream->init(_profile); - EXPECT_TRUE(status.ok()); - int num_rows = 5; - RowBatch* batch = create_row_batch(num_rows * 2); - for (int i = 0; i < num_rows * 2; i++) { - TupleRow* row = batch->GetRow(i); - input_stream->add_row(row); - EXPECT_TRUE(status.ok()); - } - - EXPECT_EQ(input_stream->num_rows(), num_rows * 2); - - RowBatch output_batch(*_child_row_desc, num_rows); - bool eos; - - status = input_stream->get_next(&output_batch, &eos); - EXPECT_TRUE(status.ok()); - - EXPECT_EQ("[(0)]", PrintRow(output_batch.GetRow(0), *_child_row_desc)); - EXPECT_EQ("[(1)]", PrintRow(output_batch.GetRow(1), *_child_row_desc)); - EXPECT_EQ("[(2)]", PrintRow(output_batch.GetRow(2), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(3), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(4), *_child_row_desc)); - - output_batch.Reset(); - - status = input_stream->get_next(&output_batch, &eos); - - EXPECT_TRUE(status.ok()); - EXPECT_EQ("[(5)]", PrintRow(output_batch.GetRow(0), *_child_row_desc)); - EXPECT_EQ("[(6)]", PrintRow(output_batch.GetRow(1), *_child_row_desc)); - EXPECT_EQ("[(7)]", PrintRow(output_batch.GetRow(2), *_child_row_desc)); - EXPECT_EQ("[(8)]", PrintRow(output_batch.GetRow(3), *_child_row_desc)); - EXPECT_EQ("[(9)]", PrintRow(output_batch.GetRow(4), *_child_row_desc)); - - EXPECT_EQ(input_stream->rows_returned(), num_rows * 2); - input_stream->close(); - delete input_stream; -} - -} // namespace doris - -\n \ No newline at end of file diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp deleted file mode 100644 index 6ebf608231..0000000000 --- a/be/test/runtime/memory_scratch_sink_test.cpp +++ /dev/null @@ -1,240 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/memory_scratch_sink.h" - -#include -#include -#include - -#include - -#include "common/config.h" -#include "common/logging.h" -#include "exec/csv_scan_node.h" -#include "exprs/expr.h" -#include "gen_cpp/DorisExternalService_types.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "olap/options.h" -#include "olap/row.h" -#include "runtime/primitive_type.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/test_env.h" -#include "runtime/tuple_row.h" -#include "testutil/desc_tbl_builder.h" -#include "util/blocking_queue.hpp" - -namespace doris { - -class MemoryScratchSinkTest : public testing::Test { -public: - MemoryScratchSinkTest() { - _env = std::make_shared(); - { - TExpr expr; - { - TExprNode node; - node.node_type = TExprNodeType::INT_LITERAL; - node.type = gen_type_desc(TPrimitiveType::INT, "int_column"); - node.num_children = 0; - TIntLiteral data; - data.value = 1; - node.__set_int_literal(data); - expr.nodes.push_back(node); - } - _exprs.push_back(expr); - } - } - - ~MemoryScratchSinkTest() { delete _state; } - - virtual void SetUp() { - config::periodic_counter_update_period_ms = 500; - config::storage_root_path = "./data"; - - EXPECT_EQ(system("mkdir -p ./test_run/output/"), 0); - EXPECT_EQ(system("pwd"), 0); - EXPECT_EQ(system("cp -r ./be/test/runtime/test_data ./test_run/."), 0); - - init(); - } - - virtual void TearDown() { - _obj_pool.clear(); - EXPECT_EQ(system("rm -rf ./test_run"), 0); - } - - void init(); - void init_desc_tbl(); - void init_runtime_state(); - -private: - ObjectPool _obj_pool; - std::shared_ptr _env; - // std::vector _exprs; - TDescriptorTable _t_desc_table; - RuntimeState* _state = nullptr; - TPlanNode _tnode; - RowDescriptor* _row_desc = nullptr; - TMemoryScratchSink _tsink; - DescriptorTbl* _desc_tbl = nullptr; - std::vector _exprs; -}; - -void MemoryScratchSinkTest::init() { - init_desc_tbl(); - init_runtime_state(); -} - -void MemoryScratchSinkTest::init_runtime_state() { - TQueryOptions query_options; - query_options.batch_size = 1024; - TUniqueId query_id; - query_id.lo = 10; - query_id.hi = 100; - _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _env->exec_env()); - _state->set_desc_tbl(_desc_tbl); - _state->_load_dir = "./test_run/output/"; - _state->init_mem_trackers(TUniqueId()); -} - -void MemoryScratchSinkTest::init_desc_tbl() { - // TTableDescriptor - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = "test"; - t_table_desc.tableName = "test_table_name"; - t_table_desc.dbName = "test_db_name"; - t_table_desc.__isset.olapTable = true; - - _t_desc_table.tableDescriptors.push_back(t_table_desc); - _t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - std::vector slot_descs; - int offset = 1; - int i = 0; - // int_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(gen_type_desc(TPrimitiveType::INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("int_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - _t_desc_table.__set_slotDescriptors(slot_descs); - - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - _t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, _t_desc_table, &_desc_tbl); - - std::vector row_tids; - row_tids.push_back(0); - - std::vector nullable_tuples; - nullable_tuples.push_back(false); - _row_desc = _obj_pool.add(new RowDescriptor(*_desc_tbl, row_tids, nullable_tuples)); - - // node - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::CSV_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.csv_scan_node.tuple_id = 0; - - _tnode.csv_scan_node.__set_column_separator(","); - _tnode.csv_scan_node.__set_line_delimiter("\n"); - - // column_type_mapping - std::map column_type_map; - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["int_column"] = column_type; - } - - _tnode.csv_scan_node.__set_column_type_mapping(column_type_map); - - std::vector columns; - columns.push_back("int_column"); - _tnode.csv_scan_node.__set_columns(columns); - - _tnode.csv_scan_node.__isset.unspecified_columns = true; - _tnode.csv_scan_node.__isset.default_values = true; - _tnode.csv_scan_node.max_filter_ratio = 0.5; - _tnode.__isset.csv_scan_node = true; -} - -TEST_F(MemoryScratchSinkTest, work_flow_normal) { - MemoryScratchSink sink(*_row_desc, _exprs, _tsink); - TDataSink data_sink; - data_sink.memory_scratch_sink = _tsink; - EXPECT_TRUE(sink.init(data_sink).ok()); - EXPECT_TRUE(sink.prepare(_state).ok()); - std::vector file_paths; - file_paths.push_back("./test_run/test_data/csv_data"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - - EXPECT_EQ(6, num); - EXPECT_TRUE(sink.send(_state, &row_batch).ok()); - EXPECT_TRUE(sink.close(_state, Status::OK()).ok()); - } - - EXPECT_TRUE(scan_node.close(_state).ok()); -} - -} // namespace doris diff --git a/be/test/runtime/sorter_test.cpp b/be/test/runtime/sorter_test.cpp deleted file mode 100644 index fe709ca935..0000000000 --- a/be/test/runtime/sorter_test.cpp +++ /dev/null @@ -1,345 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/sorter.h" - -#include -#include -#include - -#include - -#include "common/object_pool.h" -#include "exec/sort_exec_exprs.h" -#include "exprs/expr.h" -#include "gen_cpp/Descriptors_types.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/PaloInternalService_types.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/buffered_block_mgr.h" -#include "runtime/descriptors.h" -#include "runtime/primitive_type.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" -#include "util/debug_util.h" - -namespace doris { - -class SorterTest : public testing::Test { -public: - RowBatch* CreateRowBatch(int num_rows); - ObjectPool* get_object_pool() { return _object_pool; } - RuntimeState* get_runtime_state() { return _runtime_state; } - SorterTest() { - _object_pool = new ObjectPool(); - _runtime_state = new RuntimeState("SorterTest"); - _runtime_state->exec_env_ = &_exec_env; - _runtime_state->create_block_mgr(); - { - TExpr expr; - { - TExprNode node; - - node.node_type = TExprNodeType::SLOT_REF; - node.type = ToTColumnTypeThrift(TPrimitiveType::BIGINT); - node.num_children = 0; - TSlotRef data; - data.slot_id = 0; - data.tuple_id = 0; - node.__set_slot_ref(data); - expr.nodes.push_back(node); - } - _sort_tuple_slot_expr.push_back(expr); - } - { - TExpr expr; - { - TExprNode node; - - node.node_type = TExprNodeType::SLOT_REF; - node.type = ToTColumnTypeThrift(TPrimitiveType::BIGINT); - node.num_children = 0; - TSlotRef data; - data.slot_id = 1; - data.tuple_id = 1; - node.__set_slot_ref(data); - expr.nodes.push_back(node); - } - _ordering_exprs.push_back(expr); - } - _is_asc_order.push_back(true); - _nulls_first.push_back(true); - - { - TTupleDescriptor tuple_desc; - TDescriptorTable thrift_desc_tbl; - { - tuple_desc.__set_id(0); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(1); - thrift_desc_tbl.tupleDescriptors.push_back(tuple_desc); - } - { - tuple_desc.__set_id(1); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(1); - thrift_desc_tbl.tupleDescriptors.push_back(tuple_desc); - } - - TSlotDescriptor slot_desc; - { - slot_desc.__set_id(0); - slot_desc.__set_parent(0); - slot_desc.__set_slotType(TPrimitiveType::BIGINT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(0); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(-1); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - thrift_desc_tbl.slotDescriptors.push_back(slot_desc); - } - { - slot_desc.__set_id(1); - slot_desc.__set_parent(1); - slot_desc.__set_slotType(TPrimitiveType::BIGINT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(0); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(-1); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - thrift_desc_tbl.slotDescriptors.push_back(slot_desc); - } - Status status = DescriptorTbl::Create(_object_pool, thrift_desc_tbl, &_desc_tbl); - DCHECK(status.ok()); - _runtime_state->set_desc_tbl(_desc_tbl); - } - { - std::vector row_tuples; - std::vector nullable_tuples; - nullable_tuples.push_back(0); - row_tuples.push_back(0); - _child_row_desc = new RowDescriptor(*_desc_tbl, row_tuples, nullable_tuples); - } - { - std::vector row_tuples; - std::vector nullable_tuples; - nullable_tuples.push_back(1); - row_tuples.push_back(1); - _output_row_desc = new RowDescriptor(*_desc_tbl, row_tuples, nullable_tuples); - } - _runtime_profile = new RuntimeProfile("sorter"); - } - virtual ~SorterTest() { - delete _child_row_desc; - delete _output_row_desc; - delete _object_pool; - delete _runtime_state; - delete _runtime_profile; - } - -protected: - virtual void SetUp() {} - -private: - ExecEnv _exec_env; - RuntimeState* _runtime_state; - RowDescriptor* _child_row_desc; - RowDescriptor* _output_row_desc; - RuntimeProfile* _runtime_profile; - std::vector _sort_tuple_slot_expr; - std::vector _ordering_exprs; - std::vector _is_asc_order; - std::vector _nulls_first; - DescriptorTbl* _desc_tbl; - ObjectPool* _object_pool; -}; - -TEST_F(SorterTest, init_sort_exec_exprs) { - // empty sort_tuple_slot_expr - { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, nullptr, get_object_pool()); - EXPECT_TRUE(status.ok()); - } - // full sort_tuple_slot_expr - { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, &_sort_tuple_slot_expr, get_object_pool()); - EXPECT_TRUE(status.ok()); - } -} - -TEST_F(SorterTest, prepare_sort_exec_exprs) { - { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, nullptr, get_object_pool()); - EXPECT_TRUE(status.ok()); - status = exec_exprs.prepare(_runtime_state, *_child_row_desc, *_output_row_desc); - EXPECT_TRUE(status.ok()); - } - - { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, &_sort_tuple_slot_expr, get_object_pool()); - EXPECT_TRUE(status.ok()); - status = exec_exprs.prepare(_runtime_state, *_child_row_desc, *_output_row_desc); - EXPECT_TRUE(status.ok()); - } -} - -RowBatch* SorterTest::CreateRowBatch(int num_rows) { - RowBatch* batch = _object_pool->Add(new RowBatch(*_child_row_desc, num_rows)); - int64_t* tuple_mem = reinterpret_cast( - batch->tuple_data_pool()->Allocate(sizeof(int64_t) * num_rows)); - - for (int i = 0; i < num_rows; ++i) { - int idx = batch->AddRow(); - TupleRow* row = batch->GetRow(idx); - *tuple_mem = i; - row->SetTuple(0, reinterpret_cast(tuple_mem)); - - batch->CommitLastRow(); - tuple_mem++; - } - return batch; -} - -TEST_F(SorterTest, sorter_run_asc) { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, &_sort_tuple_slot_expr, _object_pool); - EXPECT_TRUE(status.ok()); - status = exec_exprs.prepare(_runtime_state, *_child_row_desc, *_output_row_desc); - EXPECT_TRUE(status.ok()); - - TupleRowComparator less_than(exec_exprs.lhs_ordering_expr_ctxs(), - exec_exprs.rhs_ordering_expr_ctxs(), _is_asc_order, _nulls_first); - Sorter* sorter = new Sorter(less_than, exec_exprs.sort_tuple_slot_expr_ctxs(), _child_row_desc, - _runtime_profile, _runtime_state); - - int num_rows = 5; - RowBatch* batch = CreateRowBatch(num_rows); - status = sorter->add_batch(batch); - EXPECT_TRUE(status.ok()); - status = sorter->add_batch(batch); - EXPECT_TRUE(status.ok()); - sorter->input_done(); - - RowBatch output_batch(*_child_row_desc, 2 * num_rows); - bool eos; - status = sorter->get_next(&output_batch, &eos); - EXPECT_TRUE(status.ok()); - - EXPECT_EQ("[(0)]", PrintRow(output_batch.GetRow(0), *_child_row_desc)); - EXPECT_EQ("[(0)]", PrintRow(output_batch.GetRow(1), *_child_row_desc)); - EXPECT_EQ("[(1)]", PrintRow(output_batch.GetRow(2), *_child_row_desc)); - EXPECT_EQ("[(1)]", PrintRow(output_batch.GetRow(3), *_child_row_desc)); - EXPECT_EQ("[(2)]", PrintRow(output_batch.GetRow(4), *_child_row_desc)); - EXPECT_EQ("[(2)]", PrintRow(output_batch.GetRow(5), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(6), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(7), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(8), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(9), *_child_row_desc)); - - delete sorter; -} - -/* reverse order : exceed 16 elements, we use quick sort*/ -TEST_F(SorterTest, sorter_run_desc_with_quick_sort) { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, &_sort_tuple_slot_expr, _object_pool); - EXPECT_TRUE(status.ok()); - status = exec_exprs.prepare(_runtime_state, *_child_row_desc, *_output_row_desc); - EXPECT_TRUE(status.ok()); - - _is_asc_order.clear(); - _is_asc_order.push_back(false); - TupleRowComparator less_than(exec_exprs.lhs_ordering_expr_ctxs(), - exec_exprs.rhs_ordering_expr_ctxs(), _is_asc_order, _nulls_first); - Sorter* sorter = new Sorter(less_than, exec_exprs.sort_tuple_slot_expr_ctxs(), _child_row_desc, - _runtime_profile, _runtime_state); - - int num_rows = 5; - RowBatch* batch = CreateRowBatch(num_rows); - for (int i = 0; i < 5; i++) { - status = sorter->add_batch(batch); - EXPECT_TRUE(status.ok()); - } - - sorter->input_done(); - - RowBatch output_batch(*_child_row_desc, 2 * num_rows); - bool eos; - status = sorter->get_next(&output_batch, &eos); - EXPECT_TRUE(status.ok()); - - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(0), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(1), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(2), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(3), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(4), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(5), *_child_row_desc)); - - delete sorter; -} - -TEST_F(SorterTest, sorter_run_desc) { - SortExecExprs exec_exprs; - Status status = exec_exprs.init(_ordering_exprs, &_sort_tuple_slot_expr, _object_pool); - EXPECT_TRUE(status.ok()); - status = exec_exprs.prepare(_runtime_state, *_child_row_desc, *_output_row_desc); - EXPECT_TRUE(status.ok()); - - _is_asc_order.clear(); - _is_asc_order.push_back(false); - TupleRowComparator less_than(exec_exprs.lhs_ordering_expr_ctxs(), - exec_exprs.rhs_ordering_expr_ctxs(), _is_asc_order, _nulls_first); - Sorter* sorter = new Sorter(less_than, exec_exprs.sort_tuple_slot_expr_ctxs(), _child_row_desc, - _runtime_profile, _runtime_state); - - int num_rows = 5; - RowBatch* batch = CreateRowBatch(num_rows); - status = sorter->add_batch(batch); - EXPECT_TRUE(status.ok()); - status = sorter->add_batch(batch); - EXPECT_TRUE(status.ok()); - sorter->input_done(); - - RowBatch output_batch(*_child_row_desc, 2 * num_rows); - bool eos; - status = sorter->get_next(&output_batch, &eos); - EXPECT_TRUE(status.ok()); - - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(0), *_child_row_desc)); - EXPECT_EQ("[(4)]", PrintRow(output_batch.GetRow(1), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(2), *_child_row_desc)); - EXPECT_EQ("[(3)]", PrintRow(output_batch.GetRow(3), *_child_row_desc)); - EXPECT_EQ("[(2)]", PrintRow(output_batch.GetRow(4), *_child_row_desc)); - EXPECT_EQ("[(2)]", PrintRow(output_batch.GetRow(5), *_child_row_desc)); - EXPECT_EQ("[(1)]", PrintRow(output_batch.GetRow(6), *_child_row_desc)); - EXPECT_EQ("[(1)]", PrintRow(output_batch.GetRow(7), *_child_row_desc)); - EXPECT_EQ("[(0)]", PrintRow(output_batch.GetRow(8), *_child_row_desc)); - EXPECT_EQ("[(0)]", PrintRow(output_batch.GetRow(9), *_child_row_desc)); - - delete sorter; -} -} // namespace doris - -\n \ No newline at end of file diff --git a/be/test/util/arrow/arrow_row_batch_test.cpp b/be/test/util/arrow/arrow_row_batch_test.cpp deleted file mode 100644 index 4899758e00..0000000000 --- a/be/test/util/arrow/arrow_row_batch_test.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include - -#include "common/logging.h" -#include "util/arrow/row_batch.h" - -#define ARROW_UTIL_LOGGING_H -#include -#include -#include -#include -#include -#include - -#include "common/object_pool.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/row_batch.h" -#include "util/debug_util.h" - -namespace doris { - -class ArrowRowBatchTest : public testing::Test { -public: - std::string test_str() { - return R"( - { "c1": 1, "c2": 1.1 } - { "c1": 2, "c2": 2.2 } - { "c1": 3, "c2": 3.3 } - )"; - } - - void MakeBuffer(const std::string& data, std::shared_ptr* out) { - auto res = arrow::AllocateBuffer(data.size(), arrow::default_memory_pool()); - *out = std::move(res.ValueOrDie()); - std::copy(std::begin(data), std::end(data), (*out)->mutable_data()); - } -}; - -TEST_F(ArrowRowBatchTest, PrettyPrint) { - auto json = test_str(); - std::shared_ptr buffer; - MakeBuffer(test_str(), &buffer); - arrow::json::ParseOptions parse_opts = arrow::json::ParseOptions::Defaults(); - parse_opts.explicit_schema = arrow::schema({ - arrow::field("c1", arrow::int64()), - }); - - auto arrow_st = arrow::json::ParseOne(parse_opts, buffer); - EXPECT_TRUE(arrow_st.ok()); - std::shared_ptr record_batch = arrow_st.ValueOrDie(); - - ObjectPool obj_pool; - RowDescriptor* row_desc; - auto doris_st = convert_to_row_desc(&obj_pool, *record_batch->schema(), &row_desc); - EXPECT_TRUE(doris_st.ok()); - std::shared_ptr row_batch; - doris_st = convert_to_row_batch(*record_batch, *row_desc, &row_batch); - EXPECT_TRUE(doris_st.ok()); - - { - std::shared_ptr check_schema; - doris_st = convert_to_arrow_schema(*row_desc, &check_schema); - EXPECT_TRUE(doris_st.ok()); - - arrow::MemoryPool* pool = arrow::default_memory_pool(); - std::shared_ptr check_batch; - doris_st = convert_to_arrow_batch(*row_batch, check_schema, pool, &check_batch); - EXPECT_TRUE(doris_st.ok()); - EXPECT_EQ(3, check_batch->num_rows()); - EXPECT_TRUE(record_batch->Equals(*check_batch)); - } -} - -} // namespace doris diff --git a/be/test/util/arrow/arrow_row_block_test.cpp b/be/test/util/arrow/arrow_row_block_test.cpp deleted file mode 100644 index be86a51d25..0000000000 --- a/be/test/util/arrow/arrow_row_block_test.cpp +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "util/arrow/row_block.h" - -#define ARROW_UTIL_LOGGING_H -#include -#include -#include -#include -#include -#include -#include - -#include "olap/row_block2.h" -#include "olap/schema.h" -#include "olap/tablet_schema_helper.h" - -namespace doris { - -class ArrowRowBlockTest : public testing::Test { -public: - ArrowRowBlockTest() {} - virtual ~ArrowRowBlockTest() {} - std::string test_str() { - return R"( - { "c1": 1, "c2": 1.1 } - { "c1": 2, "c2": 2.2 } - { "c1": 3, "c2": 3.3 } - )"; - } - void MakeBuffer(const std::string& data, std::shared_ptr* out) { - auto buffer_res = arrow::AllocateBuffer(data.size(), arrow::default_memory_pool()); - *out = std::move(buffer_res.ValueOrDie()); - std::copy(std::begin(data), std::end(data), (*out)->mutable_data()); - } -}; - -TEST_F(ArrowRowBlockTest, Normal) { - auto json = test_str(); - std::shared_ptr buffer; - MakeBuffer(test_str(), &buffer); - arrow::json::ParseOptions parse_opts = arrow::json::ParseOptions::Defaults(); - parse_opts.explicit_schema = arrow::schema({ - arrow::field("c1", arrow::int64()), - }); - - auto arrow_st = arrow::json::ParseOne(parse_opts, buffer); - EXPECT_TRUE(arrow_st.ok()); - std::shared_ptr record_batch = arrow_st.ValueOrDie(); - - std::shared_ptr schema; - auto doris_st = convert_to_doris_schema(*record_batch->schema(), &schema); - EXPECT_TRUE(doris_st.ok()); - - std::shared_ptr row_block; - doris_st = convert_to_row_block(*record_batch, *schema, &row_block); - EXPECT_TRUE(doris_st.ok()); - - { - std::shared_ptr check_schema; - doris_st = convert_to_arrow_schema(*schema, &check_schema); - EXPECT_TRUE(doris_st.ok()); - arrow::MemoryPool* pool = arrow::default_memory_pool(); - std::shared_ptr check_batch; - doris_st = convert_to_arrow_batch(*row_block, check_schema, pool, &check_batch); - EXPECT_TRUE(doris_st.ok()); - EXPECT_EQ(3, check_batch->num_rows()); - EXPECT_TRUE(record_batch->Equals(*check_batch)); - } -} - -} // namespace doris diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp deleted file mode 100644 index 21a70ffcf9..0000000000 --- a/be/test/util/arrow/arrow_work_flow_test.cpp +++ /dev/null @@ -1,333 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include -#include - -#include - -#include "common/logging.h" -#include "exec/csv_scan_node.h" -#include "gen_cpp/PlanNodes_types.h" -#include "gen_cpp/Types_types.h" -#include "olap/row.h" -#include "runtime/exec_env.h" -#include "runtime/result_queue_mgr.h" -#include "runtime/row_batch.h" -#include "runtime/runtime_state.h" -#include "runtime/thread_resource_mgr.h" -#include "runtime/tuple_row.h" -#include "util/arrow/row_batch.h" -#include "util/cpu_info.h" -#include "util/debug_util.h" -#include "util/disk_info.h" - -namespace doris { - -class ArrowWorkFlowTest : public testing::Test { -public: - ArrowWorkFlowTest() {} - ~ArrowWorkFlowTest() {} - -protected: - virtual void SetUp() { - config::periodic_counter_update_period_ms = 500; - config::storage_root_path = "./data"; - - EXPECT_EQ(system("mkdir -p ./test_run/output/"), 0); - EXPECT_EQ(system("pwd"), 0); - EXPECT_EQ(system("cp -r ./be/test/util/test_data ./test_run/."), 0); - - init(); - } - virtual void TearDown() { - _obj_pool.clear(); - EXPECT_EQ(system("rm -rf ./test_run"), 0); - - delete _state; - if (_exec_env) { - delete _exec_env->_result_queue_mgr; - delete _exec_env->_thread_mgr; - } - } - - void init(); - void init_desc_tbl(); - void init_runtime_state(); - -private: - ObjectPool _obj_pool; - TDescriptorTable _t_desc_table; - DescriptorTbl* _desc_tbl = nullptr; - TPlanNode _tnode; - ExecEnv* _exec_env = nullptr; - RuntimeState* _state = nullptr; -}; // end class ArrowWorkFlowTest - -void ArrowWorkFlowTest::init() { - _exec_env = ExecEnv::GetInstance(); - init_desc_tbl(); - init_runtime_state(); -} - -void ArrowWorkFlowTest::init_runtime_state() { - _exec_env->_result_queue_mgr = new ResultQueueMgr(); - _exec_env->_thread_mgr = new ThreadResourceMgr(); - _exec_env->_is_init = true; - TQueryOptions query_options; - query_options.batch_size = 1024; - TUniqueId query_id; - query_id.lo = 10; - query_id.hi = 100; - _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env); - _state->init_mem_trackers(); - _state->set_desc_tbl(_desc_tbl); - _state->_load_dir = "./test_run/output/"; - _state->init_mem_trackers(TUniqueId()); -} - -void ArrowWorkFlowTest::init_desc_tbl() { - // TTableDescriptor - TTableDescriptor t_table_desc; - t_table_desc.id = 0; - t_table_desc.tableType = TTableType::OLAP_TABLE; - t_table_desc.numCols = 0; - t_table_desc.numClusteringCols = 0; - t_table_desc.olapTable.tableName = "test"; - t_table_desc.tableName = "test_table_name"; - t_table_desc.dbName = "test_db_name"; - t_table_desc.__isset.olapTable = true; - - _t_desc_table.tableDescriptors.push_back(t_table_desc); - _t_desc_table.__isset.tableDescriptors = true; - - // TSlotDescriptor - std::vector slot_descs; - int offset = 1; - int i = 0; - // int_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(gen_type_desc(TPrimitiveType::INT)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("int_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(int32_t); - } - ++i; - // date_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - t_slot_desc.__set_slotType(gen_type_desc(TPrimitiveType::DATE)); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("date_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DateTimeValue); - } - ++i; - // decimalv2_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - TTypeDesc ttype = gen_type_desc(TPrimitiveType::DECIMALV2); - ttype.types[0].scalar_type.__set_precision(9); - ttype.types[0].scalar_type.__set_scale(3); - t_slot_desc.__set_slotType(ttype); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("decimalv2_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(DecimalV2Value); - } - ++i; - // fix_len_string_column - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - TTypeDesc ttype = gen_type_desc(TPrimitiveType::CHAR); - ttype.types[0].scalar_type.__set_len(5); - t_slot_desc.__set_slotType(ttype); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("fix_len_string_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(StringValue); - } - ++i; - // largeint - { - TSlotDescriptor t_slot_desc; - t_slot_desc.__set_id(i); - TTypeDesc ttype = gen_type_desc(TPrimitiveType::LARGEINT); - t_slot_desc.__set_slotType(ttype); - t_slot_desc.__set_columnPos(i); - t_slot_desc.__set_byteOffset(offset); - t_slot_desc.__set_nullIndicatorByte(0); - t_slot_desc.__set_nullIndicatorBit(-1); - t_slot_desc.__set_slotIdx(i); - t_slot_desc.__set_isMaterialized(true); - t_slot_desc.__set_colName("largeint_column"); - - slot_descs.push_back(t_slot_desc); - offset += sizeof(LargeIntVal); - } - _t_desc_table.__set_slotDescriptors(slot_descs); - - // TTupleDescriptor - TTupleDescriptor t_tuple_desc; - t_tuple_desc.id = 0; - t_tuple_desc.byteSize = offset; - t_tuple_desc.numNullBytes = 1; - t_tuple_desc.tableId = 0; - t_tuple_desc.__isset.tableId = true; - _t_desc_table.tupleDescriptors.push_back(t_tuple_desc); - - DescriptorTbl::create(&_obj_pool, _t_desc_table, &_desc_tbl); - - std::vector row_tids; - row_tids.push_back(0); - - std::vector nullable_tuples; - nullable_tuples.push_back(false); - - // node - _tnode.node_id = 0; - _tnode.node_type = TPlanNodeType::CSV_SCAN_NODE; - _tnode.num_children = 0; - _tnode.limit = -1; - _tnode.row_tuples.push_back(0); - _tnode.nullable_tuples.push_back(false); - _tnode.csv_scan_node.tuple_id = 0; - - _tnode.csv_scan_node.__set_column_separator(","); - _tnode.csv_scan_node.__set_line_delimiter("\n"); - - // column_type_mapping - std::map column_type_map; - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::INT); - column_type_map["int_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DATE); - column_type_map["date_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::DECIMALV2); - column_type.__set_precision(9); - column_type.__set_scale(3); - column_type_map["decimalv2_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::CHAR); - column_type.__set_len(5); - column_type_map["fix_len_string_column"] = column_type; - } - { - TColumnType column_type; - column_type.__set_type(TPrimitiveType::LARGEINT); - column_type_map["largeint_column"] = column_type; - } - _tnode.csv_scan_node.__set_column_type_mapping(column_type_map); - - std::vector columns; - columns.push_back("int_column"); - columns.push_back("date_column"); - columns.push_back("decimalv2_column"); - columns.push_back("fix_len_string_column"); - columns.push_back("largeint_column"); - _tnode.csv_scan_node.__set_columns(columns); - - _tnode.csv_scan_node.__isset.unspecified_columns = true; - _tnode.csv_scan_node.__isset.default_values = true; - _tnode.csv_scan_node.max_filter_ratio = 0.5; - _tnode.__isset.csv_scan_node = true; -} - -TEST_F(ArrowWorkFlowTest, NormalUse) { - std::vector file_paths; - file_paths.push_back("./test_run/test_data/csv_data"); - _tnode.csv_scan_node.__set_file_paths(file_paths); - - CsvScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl); - scan_node.init(_tnode); - Status status = scan_node.prepare(_state); - EXPECT_TRUE(status.ok()); - - status = scan_node.open(_state); - EXPECT_TRUE(status.ok()); - - RowBatch row_batch(scan_node._row_descriptor, _state->batch_size()); - bool eos = false; - - while (!eos) { - status = scan_node.get_next(_state, &row_batch, &eos); - EXPECT_TRUE(status.ok()); - // int num = std::min(row_batch.num_rows(), 10); - int num = row_batch.num_rows(); - EXPECT_EQ(6, num); - std::shared_ptr schema; - status = convert_to_arrow_schema(scan_node._row_descriptor, &schema); - EXPECT_TRUE(status.ok()); - std::shared_ptr record_batch; - status = convert_to_arrow_batch(row_batch, schema, arrow::default_memory_pool(), - &record_batch); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(6, record_batch->num_rows()); - EXPECT_EQ(5, record_batch->num_columns()); - std::string result; - status = serialize_record_batch(*record_batch, &result); - EXPECT_TRUE(status.ok()); - size_t len = result.length(); - EXPECT_TRUE(len > 0); - } - - EXPECT_TRUE(scan_node.close(_state).ok()); -} - -} // end namespace doris diff --git a/be/test/util/tuple_row_zorder_compare_test.cpp b/be/test/util/tuple_row_zorder_compare_test.cpp index 66d014087f..09cf821348 100644 --- a/be/test/util/tuple_row_zorder_compare_test.cpp +++ b/be/test/util/tuple_row_zorder_compare_test.cpp @@ -20,7 +20,6 @@ #include #include "common/logging.h" -#include "exec/sort_exec_exprs.h" #include "exprs/expr.h" #include "exprs/expr_context.h" #include "olap/memtable.h" diff --git a/be/test/vec/exec/vjson_scanner_test.cpp b/be/test/vec/exec/vjson_scanner_test.cpp index 1db2c8270a..9149fb3dd7 100644 --- a/be/test/vec/exec/vjson_scanner_test.cpp +++ b/be/test/vec/exec/vjson_scanner_test.cpp @@ -25,7 +25,6 @@ #include #include "common/object_pool.h" -#include "exec/broker_scan_node.h" #include "exprs/cast_functions.h" #include "exprs/decimalv2_operators.h" #include "gen_cpp/Descriptors_types.h" diff --git a/be/test/vec/exec/vorc_scanner_test.cpp b/be/test/vec/exec/vorc_scanner_test.cpp index 0362ebb080..39e3bb56ed 100644 --- a/be/test/vec/exec/vorc_scanner_test.cpp +++ b/be/test/vec/exec/vorc_scanner_test.cpp @@ -26,7 +26,6 @@ #include #include "common/object_pool.h" -#include "exec/orc_scanner.h" #include "exprs/cast_functions.h" #include "exprs/decimalv2_operators.h" #include "gen_cpp/Descriptors_types.h" diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp index 4e75c6808e..a987f30150 100644 --- a/be/test/vec/exec/vtablet_sink_test.cpp +++ b/be/test/vec/exec/vtablet_sink_test.cpp @@ -44,10 +44,229 @@ namespace doris { namespace stream_load { -extern Status k_add_batch_status; +Status k_add_batch_status; -TDataSink get_data_sink(TDescriptorTable* desc_tbl); -TDataSink get_decimal_sink(TDescriptorTable* desc_tbl); +TDataSink get_data_sink(TDescriptorTable* desc_tbl) { + int64_t db_id = 1; + int64_t table_id = 2; + int64_t partition_id = 3; + int64_t index1_id = 4; + int64_t tablet1_id = 6; + int64_t tablet2_id = 7; + + TDataSink data_sink; + data_sink.type = TDataSinkType::OLAP_TABLE_SINK; + data_sink.__isset.olap_table_sink = true; + + TOlapTableSink& tsink = data_sink.olap_table_sink; + tsink.load_id.hi = 123; + tsink.load_id.lo = 456; + tsink.txn_id = 789; + tsink.db_id = 1; + tsink.table_id = 2; + tsink.tuple_id = 0; + tsink.num_replicas = 3; + tsink.db_name = "testDb"; + tsink.table_name = "testTable"; + + // construct schema + TOlapTableSchemaParam& tschema = tsink.schema; + tschema.db_id = 1; + tschema.table_id = 2; + tschema.version = 0; + + // descriptor + { + TDescriptorTableBuilder dtb; + { + TTupleDescriptorBuilder tuple_builder; + + tuple_builder.add_slot(TSlotDescriptorBuilder() + .type(TYPE_INT) + .column_name("c1") + .column_pos(1) + .build()); + tuple_builder.add_slot(TSlotDescriptorBuilder() + .type(TYPE_BIGINT) + .column_name("c2") + .column_pos(2) + .build()); + tuple_builder.add_slot(TSlotDescriptorBuilder() + .string_type(10) + .column_name("c3") + .column_pos(3) + .build()); + + tuple_builder.build(&dtb); + } + { + TTupleDescriptorBuilder tuple_builder; + + tuple_builder.add_slot(TSlotDescriptorBuilder() + .type(TYPE_INT) + .column_name("c1") + .column_pos(1) + .build()); + tuple_builder.add_slot(TSlotDescriptorBuilder() + .type(TYPE_BIGINT) + .column_name("c2") + .column_pos(2) + .build()); + tuple_builder.add_slot(TSlotDescriptorBuilder() + .string_type(20) + .column_name("c3") + .column_pos(3) + .build()); + + tuple_builder.build(&dtb); + } + + *desc_tbl = dtb.desc_tbl(); + tschema.slot_descs = desc_tbl->slotDescriptors; + tschema.tuple_desc = desc_tbl->tupleDescriptors[0]; + } + // index + tschema.indexes.resize(1); + tschema.indexes[0].id = index1_id; + tschema.indexes[0].columns = {"c1", "c2", "c3"}; + // tschema.indexes[1].id = 5; + // tschema.indexes[1].columns = {"c1", "c3"}; + // partition + TOlapTablePartitionParam& tpartition = tsink.partition; + tpartition.db_id = db_id; + tpartition.table_id = table_id; + tpartition.version = table_id; + tpartition.__set_partition_column("c2"); + tpartition.__set_distributed_columns({"c1", "c3"}); + tpartition.partitions.resize(1); + tpartition.partitions[0].id = partition_id; + tpartition.partitions[0].num_buckets = 2; + tpartition.partitions[0].indexes.resize(1); + tpartition.partitions[0].indexes[0].index_id = index1_id; + tpartition.partitions[0].indexes[0].tablets = {tablet1_id, tablet2_id}; + // location + TOlapTableLocationParam& location = tsink.location; + location.db_id = db_id; + location.table_id = table_id; + location.version = 0; + location.tablets.resize(2); + location.tablets[0].tablet_id = tablet1_id; + location.tablets[0].node_ids = {0, 1, 2}; + location.tablets[1].tablet_id = tablet2_id; + location.tablets[1].node_ids = {0, 1, 2}; + // location + TPaloNodesInfo& nodes_info = tsink.nodes_info; + nodes_info.nodes.resize(3); + nodes_info.nodes[0].id = 0; + nodes_info.nodes[0].host = "127.0.0.1"; + nodes_info.nodes[0].async_internal_port = 4356; + nodes_info.nodes[1].id = 1; + nodes_info.nodes[1].host = "127.0.0.1"; + nodes_info.nodes[1].async_internal_port = 4356; + nodes_info.nodes[2].id = 2; + nodes_info.nodes[2].host = "127.0.0.1"; + nodes_info.nodes[2].async_internal_port = 4357; + + return data_sink; +} + +TDataSink get_decimal_sink(TDescriptorTable* desc_tbl) { + int64_t db_id = 1; + int64_t table_id = 2; + int64_t partition_id = 3; + int64_t index1_id = 4; + int64_t tablet1_id = 6; + int64_t tablet2_id = 7; + + TDataSink data_sink; + data_sink.type = TDataSinkType::OLAP_TABLE_SINK; + data_sink.__isset.olap_table_sink = true; + + TOlapTableSink& tsink = data_sink.olap_table_sink; + tsink.load_id.hi = 123; + tsink.load_id.lo = 456; + tsink.txn_id = 789; + tsink.db_id = 1; + tsink.table_id = 2; + tsink.tuple_id = 0; + tsink.num_replicas = 3; + tsink.db_name = "testDb"; + tsink.table_name = "testTable"; + + // construct schema + TOlapTableSchemaParam& tschema = tsink.schema; + tschema.db_id = 1; + tschema.table_id = 2; + tschema.version = 0; + + // descriptor + { + TDescriptorTableBuilder dtb; + { + TTupleDescriptorBuilder tuple_builder; + + tuple_builder.add_slot(TSlotDescriptorBuilder() + .type(TYPE_INT) + .column_name("c1") + .column_pos(1) + .build()); + tuple_builder.add_slot(TSlotDescriptorBuilder() + .decimal_type(5, 2) + .column_name("c2") + .column_pos(2) + .build()); + + tuple_builder.build(&dtb); + } + + *desc_tbl = dtb.desc_tbl(); + tschema.slot_descs = desc_tbl->slotDescriptors; + tschema.tuple_desc = desc_tbl->tupleDescriptors[0]; + } + // index + tschema.indexes.resize(1); + tschema.indexes[0].id = index1_id; + tschema.indexes[0].columns = {"c1", "c2"}; + // tschema.indexes[1].id = 5; + // tschema.indexes[1].columns = {"c1", "c3"}; + // partition + TOlapTablePartitionParam& tpartition = tsink.partition; + tpartition.db_id = db_id; + tpartition.table_id = table_id; + tpartition.version = table_id; + tpartition.__set_partition_column("c1"); + tpartition.__set_distributed_columns({"c2"}); + tpartition.partitions.resize(1); + tpartition.partitions[0].id = partition_id; + tpartition.partitions[0].num_buckets = 2; + tpartition.partitions[0].indexes.resize(1); + tpartition.partitions[0].indexes[0].index_id = index1_id; + tpartition.partitions[0].indexes[0].tablets = {tablet1_id, tablet2_id}; + // location + TOlapTableLocationParam& location = tsink.location; + location.db_id = db_id; + location.table_id = table_id; + location.version = 0; + location.tablets.resize(2); + location.tablets[0].tablet_id = tablet1_id; + location.tablets[0].node_ids = {0, 1, 2}; + location.tablets[1].tablet_id = tablet2_id; + location.tablets[1].node_ids = {0, 1, 2}; + // location + TPaloNodesInfo& nodes_info = tsink.nodes_info; + nodes_info.nodes.resize(3); + nodes_info.nodes[0].id = 0; + nodes_info.nodes[0].host = "127.0.0.1"; + nodes_info.nodes[0].async_internal_port = 4356; + nodes_info.nodes[1].id = 1; + nodes_info.nodes[1].host = "127.0.0.1"; + nodes_info.nodes[1].async_internal_port = 4356; + nodes_info.nodes[2].id = 2; + nodes_info.nodes[2].host = "127.0.0.1"; + nodes_info.nodes[2].async_internal_port = 4357; + + return data_sink; +} class VTestInternalService : public PBackendService { public: