Add cpu and io indicates to audit log (#513)
Record query consumption into fe audit log. Its basic mode of work is as follows, one of instance of parent plan is responsible for accumulating sub plan's consumption and send to it's parent, BE coordinator will get total consumption because it's a single instance.
This commit is contained in:
@ -31,9 +31,13 @@ void GetResultBatchCtx::on_failure(const Status& status) {
|
||||
delete this;
|
||||
}
|
||||
|
||||
void GetResultBatchCtx::on_close(int64_t packet_seq) {
|
||||
void GetResultBatchCtx::on_close(int64_t packet_seq,
|
||||
ExecNodeConsumptionProvider::Consumption* consumption) {
|
||||
Status status;
|
||||
status.to_protobuf(result->mutable_status());
|
||||
if (consumption != nullptr) {
|
||||
consumption->serialize(result->mutable_query_consumption());
|
||||
}
|
||||
result->set_packet_seq(packet_seq);
|
||||
result->set_eos(true);
|
||||
done->Run();
|
||||
@ -183,7 +187,7 @@ void BufferControlBlock::get_batch(GetResultBatchCtx* ctx) {
|
||||
return;
|
||||
}
|
||||
if (_is_close) {
|
||||
ctx->on_close(_packet_num);
|
||||
ctx->on_close(_packet_num, &_consumption);
|
||||
return;
|
||||
}
|
||||
// no ready data, push ctx to waiting list
|
||||
@ -200,7 +204,7 @@ Status BufferControlBlock::close(Status exec_status) {
|
||||
if (!_waiting_rpc.empty()) {
|
||||
if (_status.ok()) {
|
||||
for (auto& ctx : _waiting_rpc) {
|
||||
ctx->on_close(_packet_num);
|
||||
ctx->on_close(_packet_num, &_consumption);
|
||||
}
|
||||
} else {
|
||||
for (auto& ctx : _waiting_rpc) {
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include <boost/thread/condition_variable.hpp>
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/Types_types.h"
|
||||
#include "runtime/exec_node_consumption_provider.h"
|
||||
|
||||
namespace google {
|
||||
namespace protobuf {
|
||||
@ -52,7 +53,7 @@ struct GetResultBatchCtx {
|
||||
}
|
||||
|
||||
void on_failure(const Status& status);
|
||||
void on_close(int64_t packet_seq);
|
||||
void on_close(int64_t packet_seq, ExecNodeConsumptionProvider::Consumption* consumption = nullptr);
|
||||
void on_data(TFetchDataResult* t_result, int64_t packet_seq, bool eos = false);
|
||||
};
|
||||
|
||||
@ -80,6 +81,9 @@ public:
|
||||
return _fragment_id;
|
||||
}
|
||||
|
||||
void set_query_consumption(const ExecNodeConsumptionProvider::Consumption& consumption) {
|
||||
_consumption = consumption;
|
||||
}
|
||||
private:
|
||||
typedef std::list<TFetchDataResult*> ResultQueue;
|
||||
|
||||
@ -100,8 +104,10 @@ private:
|
||||
boost::condition_variable _data_arriaval;
|
||||
// signal removal of data by stream consumer
|
||||
boost::condition_variable _data_removal;
|
||||
|
||||
|
||||
std::deque<GetResultBatchCtx*> _waiting_rpc;
|
||||
|
||||
ExecNodeConsumptionProvider::Consumption _consumption;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -121,7 +121,8 @@ Status DataStreamMgr::add_data(
|
||||
Status DataStreamMgr::close_sender(const TUniqueId& fragment_instance_id,
|
||||
PlanNodeId dest_node_id,
|
||||
int sender_id,
|
||||
int be_number) {
|
||||
int be_number,
|
||||
const PQueryConsumption& consumption) {
|
||||
VLOG_FILE << "close_sender(): fragment_instance_id=" << fragment_instance_id
|
||||
<< ", node=" << dest_node_id;
|
||||
shared_ptr<DataStreamRecvr> recvr = find_recvr(fragment_instance_id, dest_node_id);
|
||||
@ -135,6 +136,7 @@ Status DataStreamMgr::close_sender(const TUniqueId& fragment_instance_id,
|
||||
// errors from receiver-initiated teardowns.
|
||||
return Status::OK;
|
||||
}
|
||||
recvr->add_sub_plan_consumption(consumption);
|
||||
recvr->remove_sender(sender_id, be_number);
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
@ -46,6 +46,7 @@ class DataStreamRecvr;
|
||||
class RowBatch;
|
||||
class RuntimeState;
|
||||
class PRowBatch;
|
||||
class PQueryConsumption;
|
||||
class PUniqueId;
|
||||
|
||||
// Singleton class which manages all incoming data streams at a backend node. It
|
||||
@ -96,7 +97,7 @@ public:
|
||||
// sender has closed.
|
||||
// Returns OK if successful, error status otherwise.
|
||||
Status close_sender(const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id,
|
||||
int sender_id, int be_number);
|
||||
int sender_id, int be_number, const PQueryConsumption& consumption);
|
||||
|
||||
// Closes all receivers registered for fragment_instance_id immediately.
|
||||
void cancel(const TUniqueId& fragment_instance_id);
|
||||
|
||||
@ -242,6 +242,7 @@ void DataStreamRecvr::SenderQueue::add_batch(
|
||||
// it in this thread.
|
||||
batch = new RowBatch(_recvr->row_desc(), pb_batch, _recvr->mem_tracker());
|
||||
}
|
||||
|
||||
VLOG_ROW << "added #rows=" << batch->num_rows()
|
||||
<< " batch_size=" << batch_size << "\n";
|
||||
_batch_queue.emplace_back(batch_size, batch);
|
||||
@ -433,4 +434,9 @@ Status DataStreamRecvr::get_batch(RowBatch** next_batch) {
|
||||
return _sender_queues[0]->get_batch(next_batch);
|
||||
}
|
||||
|
||||
void DataStreamRecvr::add_sub_plan_consumption(const PQueryConsumption& p_consumption) {
|
||||
ExecNodeConsumptionProvider::Consumption consumption;
|
||||
consumption.deserialize(p_consumption);
|
||||
_sub_plan_consumption.add(consumption);
|
||||
}
|
||||
}
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "common/object_pool.h"
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/Types_types.h" // for TUniqueId
|
||||
#include "runtime/exec_node_consumption_provider.h"
|
||||
#include "runtime/descriptors.h"
|
||||
#include "util/tuple_row_compare.h"
|
||||
|
||||
@ -99,6 +100,11 @@ public:
|
||||
const RowDescriptor& row_desc() const { return _row_desc; }
|
||||
MemTracker* mem_tracker() const { return _mem_tracker.get(); }
|
||||
|
||||
void add_sub_plan_consumption(const PQueryConsumption& p_consumption);
|
||||
|
||||
ExecNodeConsumptionProvider::Consumption get_sub_plan_consumption() {
|
||||
return _sub_plan_consumption;
|
||||
}
|
||||
private:
|
||||
friend class DataStreamMgr;
|
||||
class SenderQueue;
|
||||
@ -194,6 +200,7 @@ private:
|
||||
// Wall time senders spend waiting for the recv buffer to have capacity.
|
||||
RuntimeProfile::Counter* _buffer_full_wall_timer;
|
||||
|
||||
ExecNodeConsumptionProvider::Consumption _sub_plan_consumption;
|
||||
// Total time spent waiting for data to arrive in the recv buffer
|
||||
// RuntimeProfile::Counter* _data_arrival_timer;
|
||||
};
|
||||
|
||||
@ -71,7 +71,7 @@ public:
|
||||
Channel(DataStreamSender* parent, const RowDescriptor& row_desc,
|
||||
const TNetworkAddress& brpc_dest,
|
||||
const TUniqueId& fragment_instance_id,
|
||||
PlanNodeId dest_node_id, int buffer_size) :
|
||||
PlanNodeId dest_node_id, int buffer_size, bool is_transfer_chain) :
|
||||
_parent(parent),
|
||||
_buffer_size(buffer_size),
|
||||
_row_desc(row_desc),
|
||||
@ -80,7 +80,8 @@ public:
|
||||
_num_data_bytes_sent(0),
|
||||
_packet_seq(0),
|
||||
_need_close(false),
|
||||
_brpc_dest_addr(brpc_dest) {
|
||||
_brpc_dest_addr(brpc_dest),
|
||||
_is_transfer_chain(is_transfer_chain) {
|
||||
}
|
||||
|
||||
virtual ~Channel() {
|
||||
@ -163,6 +164,8 @@ private:
|
||||
palo::PInternalService_Stub* _brpc_stub = nullptr;
|
||||
RefCountClosure<PTransmitDataResult>* _closure = nullptr;
|
||||
int32_t _brpc_timeout_ms = 500;
|
||||
// whether the dest can be treated as consumption transfer chain.
|
||||
bool _is_transfer_chain;
|
||||
};
|
||||
|
||||
Status DataStreamSender::Channel::init(RuntimeState* state) {
|
||||
@ -203,6 +206,10 @@ Status DataStreamSender::Channel::send_batch(PRowBatch* batch, bool eos) {
|
||||
}
|
||||
VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id
|
||||
<< " dest_node=" << _dest_node_id;
|
||||
if (eos && _is_transfer_chain) {
|
||||
auto consumption = _brpc_request.mutable_query_consumption();
|
||||
_parent->_query_consumption.serialize(consumption);
|
||||
}
|
||||
|
||||
_brpc_request.set_eos(eos);
|
||||
if (batch != nullptr) {
|
||||
@ -305,11 +312,16 @@ DataStreamSender::DataStreamSender(
|
||||
|| sink.output_partition.type == TPartitionType::RANGE_PARTITIONED);
|
||||
// TODO: use something like google3's linked_ptr here (scoped_ptr isn't copyable)
|
||||
for (int i = 0; i < destinations.size(); ++i) {
|
||||
bool is_transfer_chain = false;
|
||||
if (destinations[i].__isset.is_transfer_chain) {
|
||||
is_transfer_chain = destinations[i].is_transfer_chain;
|
||||
}
|
||||
_channel_shared_ptrs.emplace_back(
|
||||
new Channel(this, row_desc,
|
||||
destinations[i].brpc_server,
|
||||
destinations[i].fragment_instance_id,
|
||||
sink.dest_node_id, per_channel_buffer_size));
|
||||
sink.dest_node_id, per_channel_buffer_size,
|
||||
is_transfer_chain));
|
||||
_channels.push_back(_channel_shared_ptrs[i].get());
|
||||
}
|
||||
}
|
||||
|
||||
167
be/src/runtime/exec_node_consumption_provider.h
Normal file
167
be/src/runtime/exec_node_consumption_provider.h
Normal file
@ -0,0 +1,167 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#ifndef DORIS_BE_RUNTIME_EXEC_NODE_CONSUMPTION_PROVIDER_H
|
||||
#define DORIS_BE_RUNTIME_EXEC_NODE_CONSUMPTION_PROVIDER_H
|
||||
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/string_util.h"
|
||||
#include "gen_cpp/data.pb.h"
|
||||
#include "gen_cpp/PlanNodes_types.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// Generate ExecNode resource Consumption with RuntimeProfile. CPU
|
||||
// consumption is measured by the number of rows processed and IO
|
||||
// consumption is measured by the size of the scans.
|
||||
class ExecNodeConsumptionProvider {
|
||||
public:
|
||||
|
||||
ExecNodeConsumptionProvider() {
|
||||
init();
|
||||
}
|
||||
|
||||
class Consumption {
|
||||
public:
|
||||
Consumption() : cpu(0), io(0) {
|
||||
}
|
||||
|
||||
void add(const Consumption& other) {
|
||||
cpu.add(other.cpu);
|
||||
io.add(other.io);
|
||||
}
|
||||
|
||||
void serialize(PQueryConsumption* consumption) {
|
||||
DCHECK(consumption != nullptr);
|
||||
consumption->set_cpu(cpu.load());
|
||||
consumption->set_io(io.load());
|
||||
}
|
||||
|
||||
void deserialize(const PQueryConsumption& consumption) {
|
||||
cpu.store(consumption.cpu());
|
||||
io.store(consumption.io());
|
||||
}
|
||||
|
||||
int64_t get_cpu() {
|
||||
return cpu.load();
|
||||
}
|
||||
|
||||
int64_t get_io() {
|
||||
return io.load();
|
||||
}
|
||||
|
||||
void set(int64_t cpu, int64_t io) {
|
||||
this->cpu.store(cpu);
|
||||
this->io.store(io);
|
||||
}
|
||||
|
||||
Consumption& operator=(const Consumption& other) {
|
||||
if (this != &other) {
|
||||
set(other.cpu, other.io);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
AtomicInt64 cpu;
|
||||
AtomicInt64 io;
|
||||
};
|
||||
|
||||
Consumption get_consumption(RuntimeProfile* profile) {
|
||||
Consumption total_consumption;
|
||||
std::vector<RuntimeProfile*> all_profiles;
|
||||
profile->get_all_children(&all_profiles);
|
||||
for (auto profile : all_profiles) {
|
||||
// ExecNode's RuntimeProfile name is "$ExecNode_type_name (id=?)"
|
||||
std::vector<std::string> elements;
|
||||
boost::split(elements, profile->name(), boost::is_any_of(" "), boost::token_compress_off);
|
||||
Consumption consumption;
|
||||
bool has = get_consumption(profile, &consumption, elements[0]);
|
||||
if (elements.size() == 2 && has) {
|
||||
total_consumption.add(consumption);
|
||||
}
|
||||
}
|
||||
return total_consumption;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void init() {
|
||||
functions["OLAP_SCAN_NODE"] = get_olap_scan_consumption;
|
||||
functions["HASH_JOIN_NODE"] = get_hash_join_consumption;
|
||||
functions["AGGREGATION_NODE"] = get_hash_agg_consumption;
|
||||
functions["SORT_NODE"] = get_sort_consumption;
|
||||
functions["ANALYTIC_EVAL_NODE"] = get_windows_consumption;
|
||||
functions["UNION_NODE"] = get_union_consumption;
|
||||
functions["EXCHANGE_NODE"] = get_exchange_consumption;
|
||||
}
|
||||
|
||||
bool get_consumption(RuntimeProfile* profile, Consumption* consumption, const std::string& name) {
|
||||
ConsumptionFunc get_consumption_func = functions[name];
|
||||
if (get_consumption_func != nullptr) {
|
||||
get_consumption_func(profile, consumption);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void get_olap_scan_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* read_compressed_counter = profile->get_counter("CompressedBytesRead");
|
||||
consumption->set(0, read_compressed_counter->value());
|
||||
}
|
||||
|
||||
static void get_hash_join_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* probe_counter = profile->get_counter("ProbeRows");
|
||||
RuntimeProfile::Counter* build_counter = profile->get_counter("BuildRows");
|
||||
consumption->set(probe_counter->value() + build_counter->value(), 0);
|
||||
}
|
||||
|
||||
static void get_hash_agg_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* build_counter = profile->get_counter("BuildRows");
|
||||
consumption->set(build_counter->value(), 0);
|
||||
}
|
||||
|
||||
static void get_sort_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* sort_counter = profile->get_counter("SortRows");
|
||||
consumption->set(sort_counter->value(), 0);
|
||||
}
|
||||
|
||||
static void get_windows_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* process_counter = profile->get_counter("ProcessRows");
|
||||
consumption->set(process_counter->value(), 0);
|
||||
}
|
||||
|
||||
static void get_union_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* materialize_counter = profile->get_counter("MaterializeRows");
|
||||
consumption->set(materialize_counter->value(), 0);
|
||||
}
|
||||
|
||||
static void get_exchange_consumption(RuntimeProfile* profile, Consumption* consumption) {
|
||||
RuntimeProfile::Counter* merge_counter = profile->get_counter("MergeRows");
|
||||
// exchange merge sort
|
||||
if (merge_counter != nullptr) {
|
||||
consumption->set(merge_counter->value(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::function<void(RuntimeProfile*, Consumption*)> ConsumptionFunc;
|
||||
// ExecNode type name to function
|
||||
std::map<std::string, ConsumptionFunc> functions;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -333,6 +333,8 @@ Status PlanFragmentExecutor::open_internal() {
|
||||
// audit the sinks to check that this is ok, or change that behaviour.
|
||||
{
|
||||
SCOPED_TIMER(profile()->total_time_counter());
|
||||
ExecNodeConsumptionProvider::Consumption consumption = runtime_state()->get_consumption();
|
||||
_sink->set_query_consumption(consumption);
|
||||
Status status = _sink->close(runtime_state(), _status);
|
||||
RETURN_IF_ERROR(status);
|
||||
}
|
||||
|
||||
@ -82,6 +82,8 @@ Status ResultSink::close(RuntimeState* state, Status exec_status) {
|
||||
}
|
||||
// close sender, this is normal path end
|
||||
if (_sender) {
|
||||
// In the last, send consumption of execnode.
|
||||
_sender->set_query_consumption(_query_consumption);
|
||||
_sender->close(exec_status);
|
||||
}
|
||||
state->exec_env()->result_mgr()->cancel_at_time(time(NULL) + config::result_buffer_cancelled_interval_time,
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
#include "common/global_types.h"
|
||||
#include "util/logging.h"
|
||||
#include "runtime/exec_node_consumption_provider.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/thread_resource_mgr.h"
|
||||
#include "gen_cpp/Types_types.h" // for TUniqueId
|
||||
@ -492,6 +493,18 @@ public:
|
||||
return _is_running;
|
||||
}
|
||||
|
||||
void add_sub_plan_consumption(const ExecNodeConsumptionProvider::Consumption& consumption) {
|
||||
_sub_plan_consumption.add(consumption);
|
||||
}
|
||||
|
||||
ExecNodeConsumptionProvider::Consumption get_consumption() {
|
||||
ExecNodeConsumptionProvider provider;
|
||||
ExecNodeConsumptionProvider::Consumption total_consumption;
|
||||
total_consumption = provider.get_consumption(&_profile);
|
||||
total_consumption.add(_sub_plan_consumption);
|
||||
return total_consumption;
|
||||
}
|
||||
|
||||
private:
|
||||
// Allow TestEnv to set block_mgr manually for testing.
|
||||
friend class TestEnv;
|
||||
@ -638,6 +651,9 @@ private:
|
||||
/// TODO: not needed if we call ReleaseResources() in a timely manner (IMPALA-1575).
|
||||
AtomicInt32 _initial_reservation_refcnt;
|
||||
|
||||
// Consumption from sub plan, it should only be updated by ExchangeNode.
|
||||
ExecNodeConsumptionProvider::Consumption _sub_plan_consumption;
|
||||
|
||||
// prohibit copies
|
||||
RuntimeState(const RuntimeState&);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user