branch-2.1: [improve](move-memtable) improve error log and message for "not enough streams" #47470 (#47722)

Cherry-picked from #47470

Co-authored-by: Kaijie Chen <chenkaijie@selectdb.com>
This commit is contained in:
github-actions[bot]
2025-02-12 14:42:11 +08:00
committed by GitHub
parent cb3e77379d
commit f85176bee0
2 changed files with 20 additions and 3 deletions

View File

@ -377,6 +377,7 @@ void VTabletWriterV2::_generate_rows_for_tablet(std::vector<RowPartTabletIds>& r
Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id, int64_t index_id,
std::vector<std::shared_ptr<LoadStreamStub>>& streams) {
std::vector<int64_t> failed_node_ids;
const auto* location = _location->find_tablet(tablet_id);
DBUG_EXECUTE_IF("VTabletWriterV2._select_streams.location_null", { location = nullptr; });
if (location == nullptr) {
@ -396,6 +397,9 @@ Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id,
<< ", stream_ok=" << (stream == nullptr ? "no" : "yes");
});
if (stream == nullptr) {
LOG(WARNING) << "skip writing tablet " << tablet_id << " to backend " << node_id
<< ": stream is not open";
failed_node_ids.push_back(node_id);
continue;
}
streams.emplace_back(std::move(stream));
@ -406,8 +410,21 @@ Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id,
<< ", num_nodes=" << location->node_ids.size();
});
if (streams.size() <= location->node_ids.size() / 2) {
return Status::InternalError("not enough streams {}/{}", streams.size(),
location->node_ids.size());
std::ostringstream success_msg;
std::ostringstream failed_msg;
for (auto& s : streams) {
success_msg << ", " << s->dst_id();
}
for (auto id : failed_node_ids) {
failed_msg << ", " << id;
}
LOG(INFO) << "failed to write enough replicas " << streams.size() << "/"
<< location->node_ids.size() << " for tablet " << tablet_id
<< " due to connection errors; success nodes" << success_msg.str()
<< "; failed nodes" << failed_msg.str() << ".";
return Status::InternalError(
"failed to write enough replicas {}/{} for tablet {} due to connection errors",
streams.size(), location->node_ids.size(), tablet_id);
}
Status st;
for (auto& stream : streams) {

View File

@ -102,7 +102,7 @@ suite("test_multi_replica_fault_injection", "nonConcurrent") {
// test one backend open failure
load_with_injection("VTabletWriterV2._open_streams.skip_one_backend", "success", true)
// test two backend open failure
load_with_injection("VTabletWriterV2._open_streams.skip_two_backends", "not enough streams 1/3", false, "succ replica num 1 < load required replica num 2")
load_with_injection("VTabletWriterV2._open_streams.skip_two_backends", "failed to write enough replicas 1/3 for tablet", false, "succ replica num 1 < load required replica num 2")
sql """ set enable_memtable_on_sink_node=false """
}
}