[improvement](load)add more log for better bug tracing experience for be write (#14424)

Recently when tracing one bug happened in version 1.1.4
I found out there were some places we can add more log for a better tracing.
This commit is contained in:
AlexYue
2022-11-29 22:28:39 +08:00
committed by GitHub
parent 82579126cf
commit 898d0d42f1
3 changed files with 24 additions and 4 deletions

View File

@ -216,6 +216,7 @@ Status NodeChannel::open_wait() {
// if this is last rpc, will must set _add_batches_finished. otherwise, node channel's close_wait
// will be blocked.
_add_batches_finished = true;
VLOG_PROGRESS << "node channel " << channel_info() << "add_batches_finished";
}
});
@ -271,6 +272,9 @@ Status NodeChannel::open_wait() {
}
}
_add_batches_finished = true;
VLOG_PROGRESS << "node channel " << channel_info()
<< "add_batches_finished and handled "
<< result.tablet_errors().size() << " tablets errors";
}
} else {
_cancel_with_msg(
@ -408,7 +412,8 @@ void NodeChannel::mark_close() {
DCHECK(_pending_batches.back().second.eos());
_close_time_ms = UnixMillis();
LOG(INFO) << channel_info()
<< " mark closed, left pending batch size: " << _pending_batches.size();
<< " mark closed, left pending batch size: " << _pending_batches.size()
<< " left pending batch size: " << _pending_batches_bytes;
}
_eos_is_produced = true;
@ -718,6 +723,8 @@ Status IndexChannel::init(RuntimeState* state, const std::vector<TTabletWithPart
void IndexChannel::mark_as_failed(int64_t node_id, const std::string& host, const std::string& err,
int64_t tablet_id) {
VLOG_PROGRESS << "mark node_id:" << node_id << " tablet_id: " << tablet_id
<< " as failed, err: " << err;
const auto& it = _tablets_by_channel.find(node_id);
if (it == _tablets_by_channel.end()) {
return;
@ -950,6 +957,10 @@ Status OlapTableSink::prepare(RuntimeState* state) {
tablets.emplace_back(std::move(tablet_with_partition));
}
}
if (UNLIKELY(tablets.empty())) {
LOG(WARNING) << "load job:" << state->load_job_id() << " index: " << index->index_id
<< " would open 0 tablet";
}
_channels.emplace_back(new IndexChannel(this, index->index_id, use_vec));
RETURN_IF_ERROR(_channels.back()->init(state, tablets));
}

View File

@ -223,7 +223,7 @@ Status DeltaWriter::write(const vectorized::Block* block, const std::vector<int>
if (_mem_table->is_flush()) {
auto s = _flush_memtable_async();
_reset_mem_table();
if (OLAP_UNLIKELY(!s.ok())) {
if (UNLIKELY(!s.ok())) {
return s;
}
}
@ -255,7 +255,7 @@ Status DeltaWriter::flush_memtable_and_wait(bool need_wait) {
<< ", load id: " << print_id(_req.load_id);
auto s = _flush_memtable_async();
_reset_mem_table();
if (OLAP_UNLIKELY(!s.ok())) {
if (UNLIKELY(!s.ok())) {
return s;
}
@ -346,7 +346,11 @@ Status DeltaWriter::close_wait(const PSlaveTabletNodes& slave_tablet_nodes,
return _cancel_status;
}
// return error if previous flush failed
RETURN_NOT_OK(_flush_token->wait());
auto st = _flush_token->wait();
if (UNLIKELY(!st.ok())) {
LOG(WARNING) << "previous flush failed tablet " << _tablet->tablet_id();
return st;
}
_mem_table.reset();

View File

@ -130,6 +130,8 @@ Status TabletsChannel::close(
// just skip this tablet(writer) and continue to close others
continue;
}
VLOG_PROGRESS << "cancel tablet writer successfully, tablet_id=" << it.first
<< ", transaction_id=" << _txn_id;
}
}
@ -189,6 +191,8 @@ void TabletsChannel::_close_wait(DeltaWriter* writer,
PTabletError* tablet_error = tablet_errors->Add();
tablet_error->set_tablet_id(writer->tablet_id());
tablet_error->set_msg(st.get_error_msg());
VLOG_PROGRESS << "close wait failed tablet " << writer->tablet_id() << " transaction_id "
<< _txn_id << "err msg " << st.get_error_msg();
}
}
@ -443,6 +447,7 @@ Status TabletsChannel::add_batch(const TabletWriterAddRequest& request,
int64_t tablet_id = request.tablet_ids(i);
if (_broken_tablets.find(tablet_id) != _broken_tablets.end()) {
// skip broken tablets
VLOG_PROGRESS << "skip broken tablet tablet=" << tablet_id;
continue;
}
auto it = tablet_to_rowidxs.find(tablet_id);