Files
doris/be/src/runtime/data_stream_sender.h
Zhengguo Yang 6c6380969b [refactor] replace boost smart ptr with stl (#6856)
1. replace all boost::shared_ptr to std::shared_ptr
2. replace all boost::scopted_ptr to std::unique_ptr
3. replace all boost::scoped_array to std::unique<T[]>
4. replace all boost:thread to std::thread
2021-11-17 10:18:35 +08:00

273 lines
10 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_RUNTIME_DATA_STREAM_SENDER_H
#define DORIS_BE_RUNTIME_DATA_STREAM_SENDER_H
#include <string>
#include <vector>
#include "common/global_types.h"
#include "common/object_pool.h"
#include "common/status.h"
#include "exec/data_sink.h"
#include "gen_cpp/BackendService.h"
#include "gen_cpp/PaloInternalService_types.h"
#include "gen_cpp/Types_types.h"
#include "gen_cpp/data.pb.h" // for PRowBatch
#include "gen_cpp/internal_service.pb.h"
#include "service/backend_options.h"
#include "service/brpc.h"
#include "util/ref_count_closure.h"
#include "util/runtime_profile.h"
#include "util/uid_util.h"
namespace doris {
class ExprContext;
class RowBatch;
class RowDescriptor;
class TDataStreamSink;
class TNetworkAddress;
class TPlanFragmentDestination;
class PartitionInfo;
class TupleRow;
class PartRangeKey;
class MemTracker;
// Single sender of an m:n data stream.
// Row batch data is routed to destinations based on the provided
// partitioning specification.
// *Not* thread-safe.
//
// TODO: capture stats that describe distribution of rows/data volume
// across channels.
class DataStreamSender : public DataSink {
public:
DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc);
// Construct a sender according to the output specification (sink),
// sending to the given destinations.
// Per_channel_buffer_size is the buffer size allocated to each channel
// and is specified in bytes.
// The RowDescriptor must live until close() is called.
// NOTE: supported partition types are UNPARTITIONED (broadcast) and HASH_PARTITIONED
DataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc,
const TDataStreamSink& sink,
const std::vector<TPlanFragmentDestination>& destinations,
int per_channel_buffer_size, bool send_query_statistics_with_every_batch);
virtual ~DataStreamSender();
virtual Status init(const TDataSink& thrift_sink);
// Must be called before other API calls, and before the codegen'd IR module is
// compiled (i.e. in an ExecNode's Prepare() function).
virtual Status prepare(RuntimeState* state);
// Must be called before Send() or Close(), and after the codegen'd IR module is
// compiled (i.e. in an ExecNode's Open() function).
virtual Status open(RuntimeState* state);
// send data in 'batch' to destination nodes according to partitioning
// specification provided in c'tor.
// Blocks until all rows in batch are placed in their appropriate outgoing
// buffers (ie, blocks if there are still in-flight rpcs from the last
// send() call).
virtual Status send(RuntimeState* state, RowBatch* batch);
// Flush all buffered data and close all existing channels to destination
// hosts. Further send() calls are illegal after calling close().
virtual Status close(RuntimeState* state, Status exec_status);
/// Serializes the src batch into the dest thrift batch. Maintains metrics.
/// num_receivers is the number of receivers this batch will be sent to. Only
/// used to maintain metrics.
template <class T>
Status serialize_batch(RowBatch* src, T* dest, int num_receivers = 1);
// Return total number of bytes sent in TRowBatch.data. If batches are
// broadcast to multiple receivers, they are counted once per receiver.
int64_t get_num_data_bytes_sent() const;
virtual RuntimeProfile* profile() { return _profile; }
RuntimeState* state() { return _state; }
protected:
const RowDescriptor& _row_desc;
// A channel sends data asynchronously via calls to transmit_data
// to a single destination ipaddress/node.
// It has a fixed-capacity buffer and allows the caller either to add rows to
// that buffer individually (AddRow()), or circumvent the buffer altogether and send
// TRowBatches directly (SendBatch()). Either way, there can only be one in-flight RPC
// at any one time (ie, sending will block if the most recent rpc hasn't finished,
// which allows the receiver node to throttle the sender by withholding acks).
// *Not* thread-safe.
class Channel {
public:
Channel(DataStreamSender* parent, const RowDescriptor& row_desc,
const TNetworkAddress& brpc_dest, const TUniqueId& fragment_instance_id,
PlanNodeId dest_node_id, int buffer_size, bool is_transfer_chain,
bool send_query_statistics_with_every_batch);
~Channel();
// Initialize channel.
// Returns OK if successful, error indication otherwise.
Status init(RuntimeState* state);
// Copies a single row into this channel's output buffer and flushes buffer
// if it reaches capacity.
// Returns error status if any of the preceding rpcs failed, OK otherwise.
Status add_row(TupleRow* row);
// Asynchronously sends a row batch.
// Returns the status of the most recently finished transmit_data
// rpc (or OK if there wasn't one that hasn't been reported yet).
// if batch is nullptr, send the eof packet
Status send_batch(PRowBatch* batch, bool eos = false);
Status send_local_batch(bool eos);
Status send_local_batch(RowBatch* batch, bool use_move);
// Flush buffered rows and close channel. This function don't wait the response
// of close operation, client should call close_wait() to finish channel's close.
// We split one close operation into two phases in order to make multiple channels
// can run parallel.
Status close(RuntimeState* state);
// Get close wait's response, to finish channel close operation.
Status close_wait(RuntimeState* state);
int64_t num_data_bytes_sent() const { return _num_data_bytes_sent; }
PRowBatch* pb_batch() { return &_pb_batch; }
std::string get_fragment_instance_id_str() {
UniqueId uid(_fragment_instance_id);
return uid.to_string();
}
TUniqueId get_fragment_instance_id() { return _fragment_instance_id; }
bool is_local() { return _is_local; }
inline Status _wait_last_brpc() {
if (_closure == nullptr) return Status::OK();
auto cntl = &_closure->cntl;
brpc::Join(cntl->call_id());
if (cntl->Failed()) {
std::stringstream ss;
ss << "failed to send brpc batch, error=" << berror(cntl->ErrorCode())
<< ", error_text=" << cntl->ErrorText()
<< ", client: " << BackendOptions::get_localhost();
LOG(WARNING) << ss.str();
return Status::ThriftRpcError(ss.str());
}
return Status::OK();
}
// Serialize _batch into _thrift_batch and send via send_batch().
// Returns send_batch() status.
Status send_current_batch(bool eos = false);
Status close_internal();
DataStreamSender* _parent;
int _buffer_size;
const RowDescriptor& _row_desc;
TUniqueId _fragment_instance_id;
PlanNodeId _dest_node_id;
// the number of TRowBatch.data bytes sent successfully
int64_t _num_data_bytes_sent;
int64_t _packet_seq;
// we're accumulating rows into this batch
std::unique_ptr<RowBatch> _batch;
bool _need_close;
int _be_number;
TNetworkAddress _brpc_dest_addr;
// TODO(zc): initused for brpc
PUniqueId _finst_id;
PRowBatch _pb_batch;
PTransmitDataParams _brpc_request;
std::shared_ptr<PBackendService_Stub> _brpc_stub = nullptr;
RefCountClosure<PTransmitDataResult>* _closure = nullptr;
int32_t _brpc_timeout_ms = 500;
// whether the dest can be treated as query statistics transfer chain.
bool _is_transfer_chain;
bool _send_query_statistics_with_every_batch;
bool _is_local;
};
RuntimeProfile* _profile; // Allocated from _pool
PRowBatch* _current_pb_batch;
std::shared_ptr<MemTracker> _mem_tracker;
ObjectPool* _pool;
// Sender instance id, unique within a fragment.
int _sender_id;
RuntimeProfile::Counter* _serialize_batch_timer;
RuntimeProfile::Counter* _bytes_sent_counter;
// Used to counter send bytes under local data exchange
RuntimeProfile::Counter* _local_bytes_send_counter;
RuntimeProfile::Counter* _uncompressed_bytes_counter;
RuntimeState* _state;
std::vector<Channel*> _channels;
std::vector<std::shared_ptr<Channel>> _channel_shared_ptrs;
private:
Status compute_range_part_code(RuntimeState* state, TupleRow* row, size_t* hash_value,
bool* ignore);
int binary_find_partition(const PartRangeKey& key) const;
Status find_partition(RuntimeState* state, TupleRow* row, PartitionInfo** info, bool* ignore);
Status process_distribute(RuntimeState* state, TupleRow* row, const PartitionInfo* part,
size_t* hash_val);
int _current_channel_idx; // index of current channel to send to if _random == true
TPartitionType::type _part_type;
bool _ignore_not_found;
// serialized batches for broadcasting; we need two so we can write
// one while the other one is still being sent
PRowBatch _pb_batch1;
PRowBatch _pb_batch2;
std::vector<ExprContext*> _partition_expr_ctxs; // compute per-row partition values
// map from range value to partition_id
// sorted in ascending orderi by range for binary search
std::vector<PartitionInfo*> _partition_infos;
RuntimeProfile::Counter* _ignore_rows;
// Throughput per total time spent in sender
RuntimeProfile::Counter* _overall_throughput;
// Identifier of the destination plan node.
PlanNodeId _dest_node_id;
};
} // namespace doris
#endif