[improvement](insert) refactor group commit stream load (#25560)

This commit is contained in:
meiyi
2023-10-20 13:27:30 +08:00
committed by GitHub
parent 9a675fcdfc
commit d0cd535cb9
26 changed files with 518 additions and 358 deletions

View File

@ -48,11 +48,6 @@ Status LoadBlockQueue::add_block(std::shared_ptr<vectorized::FutureBlock> block)
if (block->rows() > 0) {
_block_queue.push_back(block);
}
if (block->is_eos()) {
_load_ids.erase(block->get_load_id());
} else if (block->is_first()) {
_load_ids.emplace(block->get_load_id());
}
_cv->notify_one();
return Status::OK();
}
@ -62,31 +57,31 @@ Status LoadBlockQueue::get_block(vectorized::Block* block, bool* find_block, boo
*eos = false;
std::unique_lock l(*_mutex);
if (!need_commit) {
auto left_seconds = config::group_commit_interval_seconds -
std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - _start_time)
.count();
if (left_seconds <= 0) {
auto left_milliseconds = config::group_commit_interval_ms -
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - _start_time)
.count();
if (left_milliseconds <= 0) {
need_commit = true;
}
}
while (_status.ok() && _block_queue.empty() &&
(!need_commit || (need_commit && !_load_ids.empty()))) {
auto left_seconds = config::group_commit_interval_seconds;
auto left_milliseconds = config::group_commit_interval_ms;
if (!need_commit) {
left_seconds = config::group_commit_interval_seconds -
std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::steady_clock::now() - _start_time)
.count();
if (left_seconds <= 0) {
left_milliseconds = config::group_commit_interval_ms -
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - _start_time)
.count();
if (left_milliseconds <= 0) {
need_commit = true;
break;
}
}
#if !defined(USE_BTHREAD_SCANNER)
_cv->wait_for(l, std::chrono::seconds(left_seconds));
_cv->wait_for(l, std::chrono::milliseconds(left_milliseconds));
#else
_cv->wait_for(l, left_seconds * 1000000);
_cv->wait_for(l, left_milliseconds * 1000);
#endif
}
if (!_block_queue.empty()) {
@ -96,12 +91,10 @@ Status LoadBlockQueue::get_block(vectorized::Block* block, bool* find_block, boo
*find_block = true;
_block_queue.pop_front();
}
if (_block_queue.empty()) {
if (need_commit && _load_ids.empty()) {
*eos = true;
} else {
*eos = false;
}
if (_block_queue.empty() && need_commit && _load_ids.empty()) {
*eos = true;
} else {
*eos = false;
}
return Status::OK();
}
@ -114,6 +107,16 @@ void LoadBlockQueue::remove_load_id(const UniqueId& load_id) {
}
}
Status LoadBlockQueue::add_load_id(const UniqueId& load_id) {
std::unique_lock l(*_mutex);
if (need_commit) {
return Status::InternalError("block queue is set need commit, id=" +
load_instance_id.to_string());
}
_load_ids.emplace(load_id);
return Status::OK();
}
void LoadBlockQueue::cancel(const Status& st) {
DCHECK(!st.ok());
std::unique_lock l(*_mutex);
@ -133,59 +136,62 @@ Status GroupCommitTable::get_first_block_load_queue(
int64_t table_id, std::shared_ptr<vectorized::FutureBlock> block,
std::shared_ptr<LoadBlockQueue>& load_block_queue) {
DCHECK(table_id == _table_id);
DCHECK(block->is_first() == true);
auto base_schema_version = block->get_schema_version();
{
std::unique_lock l(_lock);
for (auto it = _load_block_queues.begin(); it != _load_block_queues.end(); ++it) {
// TODO if block schema version is less than fragment schema version, return error
if (!it->second->need_commit &&
it->second->schema_version == block->get_schema_version()) {
if (block->get_schema_version() == it->second->schema_version) {
load_block_queue = it->second;
break;
} else if (block->get_schema_version() < it->second->schema_version) {
return Status::DataQualityError("schema version not match");
}
}
}
}
if (load_block_queue == nullptr) {
Status st = Status::OK();
for (int i = 0; i < 3; ++i) {
std::unique_lock l(_request_fragment_mutex);
// check if there is a re-usefully fragment
{
std::unique_lock l1(_lock);
for (auto it = _load_block_queues.begin(); it != _load_block_queues.end(); ++it) {
// TODO if block schema version is less than fragment schema version, return error
if (!it->second->need_commit) {
if (block->get_schema_version() == it->second->schema_version) {
for (int i = 0; i < 3; i++) {
bool is_schema_version_match = true;
for (auto it = _load_block_queues.begin(); it != _load_block_queues.end(); ++it) {
if (!it->second->need_commit) {
if (base_schema_version == it->second->schema_version) {
if (it->second->add_load_id(block->get_load_id()).ok()) {
load_block_queue = it->second;
break;
} else if (block->get_schema_version() < it->second->schema_version) {
return Status::DataQualityError("schema version not match");
return Status::OK();
}
} else if (base_schema_version < it->second->schema_version) {
is_schema_version_match = false;
}
}
}
if (load_block_queue == nullptr) {
st = _create_group_commit_load(table_id, load_block_queue);
if (LIKELY(st.ok())) {
break;
if (!is_schema_version_match) {
return Status::DataQualityError("schema version not match");
}
if (!_need_plan_fragment) {
_need_plan_fragment = true;
RETURN_IF_ERROR(_thread_pool->submit_func([&] {
[[maybe_unused]] auto st = _create_group_commit_load(load_block_queue);
}));
}
#if !defined(USE_BTHREAD_SCANNER)
_cv.wait_for(l, std::chrono::seconds(4));
#else
_cv.wait_for(l, 4 * 1000000);
#endif
if (load_block_queue != nullptr) {
if (load_block_queue->schema_version == base_schema_version) {
if (load_block_queue->add_load_id(block->get_load_id()).ok()) {
return Status::OK();
}
} else if (base_schema_version < load_block_queue->schema_version) {
return Status::DataQualityError("schema version not match");
}
load_block_queue.reset();
}
}
RETURN_IF_ERROR(st);
if (load_block_queue->schema_version != block->get_schema_version()) {
// TODO check this is the first block
return Status::DataQualityError("schema version not match");
}
}
return Status::OK();
return Status::InternalError("can not get a block queue");
}
Status GroupCommitTable::_create_group_commit_load(
int64_t table_id, std::shared_ptr<LoadBlockQueue>& load_block_queue) {
std::shared_ptr<LoadBlockQueue>& load_block_queue) {
Status st = Status::OK();
std::unique_ptr<int, std::function<void(int*)>> remove_pipe_func((int*)0x01, [&](int*) {
if (!st.ok()) {
std::unique_lock l(_lock);
_need_plan_fragment = false;
_cv.notify_all();
}
});
TStreamLoadPutRequest request;
UniqueId load_id = UniqueId::gen_uid();
TUniqueId tload_id;
@ -194,8 +200,8 @@ Status GroupCommitTable::_create_group_commit_load(
std::regex reg("-");
std::string label = "group_commit_" + std::regex_replace(load_id.to_string(), reg, "_");
std::stringstream ss;
ss << "insert into table_id(" << table_id << ") WITH LABEL " << label
<< " select * from group_commit(\"table_id\"=\"" << table_id << "\")";
ss << "insert into table_id(" << _table_id << ") WITH LABEL " << label
<< " select * from group_commit(\"table_id\"=\"" << _table_id << "\")";
request.__set_load_sql(ss.str());
request.__set_loadId(tload_id);
request.__set_label(label);
@ -209,13 +215,14 @@ Status GroupCommitTable::_create_group_commit_load(
}
TStreamLoadPutResult result;
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
RETURN_IF_ERROR(ThriftRpcHelper::rpc<FrontendServiceClient>(
st = ThriftRpcHelper::rpc<FrontendServiceClient>(
master_addr.hostname, master_addr.port,
[&result, &request](FrontendServiceConnection& client) {
client->streamLoadPut(result, request);
},
10000L));
Status st = Status::create(result.status);
10000L);
RETURN_IF_ERROR(st);
st = Status::create(result.status);
if (!st.ok()) {
LOG(WARNING) << "create group commit load error, st=" << st.to_string();
}
@ -236,7 +243,7 @@ Status GroupCommitTable::_create_group_commit_load(
DCHECK(pipeline_params.local_params.size() == 1);
instance_id = pipeline_params.local_params[0].fragment_instance_id;
}
VLOG_DEBUG << "create plan fragment, db_id=" << _db_id << ", table=" << table_id
VLOG_DEBUG << "create plan fragment, db_id=" << _db_id << ", table=" << _table_id
<< ", schema version=" << schema_version << ", label=" << label
<< ", txn_id=" << txn_id << ", instance_id=" << print_id(instance_id)
<< ", is_pipeline=" << is_pipeline;
@ -245,11 +252,13 @@ Status GroupCommitTable::_create_group_commit_load(
std::make_shared<LoadBlockQueue>(instance_id, label, txn_id, schema_version);
std::unique_lock l(_lock);
_load_block_queues.emplace(instance_id, load_block_queue);
_need_plan_fragment = false;
_cv.notify_all();
}
params.__set_import_label(label);
st = _exec_plan_fragment(_db_id, table_id, label, txn_id, is_pipeline, params, pipeline_params);
st = _exec_plan_fragment(_db_id, _table_id, label, txn_id, is_pipeline, params,
pipeline_params);
if (!st.ok()) {
static_cast<void>(_finish_group_commit_load(_db_id, table_id, label, txn_id, instance_id,
static_cast<void>(_finish_group_commit_load(_db_id, _table_id, label, txn_id, instance_id,
st, true, nullptr));
}
return st;
@ -346,6 +355,7 @@ Status GroupCommitTable::_finish_group_commit_load(int64_t db_id, int64_t table_
if (state && !(state->get_error_log_file_path().empty())) {
ss << ", error_url=" << state->get_error_log_file_path();
}
ss << ", rows=" << state->num_rows_load_success();
LOG(INFO) << ss.str();
return st;
}
@ -384,6 +394,10 @@ GroupCommitMgr::GroupCommitMgr(ExecEnv* exec_env) : _exec_env(exec_env) {
.set_min_threads(config::group_commit_insert_threads)
.set_max_threads(config::group_commit_insert_threads)
.build(&_insert_into_thread_pool));
static_cast<void>(ThreadPoolBuilder("GroupCommitThreadPool")
.set_min_threads(1)
.set_max_threads(config::group_commit_insert_threads)
.build(&_thread_pool));
}
GroupCommitMgr::~GroupCommitMgr() {
@ -392,6 +406,7 @@ GroupCommitMgr::~GroupCommitMgr() {
void GroupCommitMgr::stop() {
_insert_into_thread_pool->shutdown();
_thread_pool->shutdown();
LOG(INFO) << "GroupCommitMgr is stopped";
}
@ -456,17 +471,16 @@ Status GroupCommitMgr::group_commit_insert(int64_t table_id, const TPlan& plan,
std::unique_ptr<doris::vectorized::Block> _block =
doris::vectorized::Block::create_unique();
bool eof = false;
bool first = true;
while (!eof) {
// TODO what to do if read one block error
RETURN_IF_ERROR(file_scan_node.get_next(runtime_state.get(), _block.get(), &eof));
std::shared_ptr<doris::vectorized::FutureBlock> future_block =
std::make_shared<doris::vectorized::FutureBlock>();
future_block->swap(*(_block.get()));
future_block->set_info(request->base_schema_version(), load_id, first, eof);
future_block->set_info(request->base_schema_version(), load_id);
if (load_block_queue == nullptr) {
RETURN_IF_ERROR(_get_first_block_load_queue(request->db_id(), table_id,
future_block, load_block_queue));
RETURN_IF_ERROR(get_first_block_load_queue(request->db_id(), table_id, future_block,
load_block_queue));
response->set_label(load_block_queue->label);
response->set_txn_id(load_block_queue->txn_id);
}
@ -475,7 +489,6 @@ Status GroupCommitMgr::group_commit_insert(int64_t table_id, const TPlan& plan,
future_blocks.emplace_back(future_block);
}
RETURN_IF_ERROR(load_block_queue->add_block(future_block));
first = false;
}
if (!runtime_state->get_error_log_file_path().empty()) {
LOG(INFO) << "id=" << print_id(load_id)
@ -515,139 +528,15 @@ Status GroupCommitMgr::_append_row(std::shared_ptr<io::StreamLoadPipe> pipe,
return Status::OK();
}
Status GroupCommitMgr::group_commit_stream_load(std::shared_ptr<StreamLoadContext> ctx) {
return _insert_into_thread_pool->submit_func([ctx, this] {
Status st = _group_commit_stream_load(ctx);
if (!st.ok()) {
ctx->promise.set_value(st);
}
});
}
Status GroupCommitMgr::_group_commit_stream_load(std::shared_ptr<StreamLoadContext> ctx) {
auto& fragment_params = ctx->put_result.params;
auto& tdesc_tbl = fragment_params.desc_tbl;
DCHECK(fragment_params.params.per_node_scan_ranges.size() == 1);
DCHECK(fragment_params.params.per_node_scan_ranges.begin()->second.size() == 1);
auto& tscan_range_params = fragment_params.params.per_node_scan_ranges.begin()->second.at(0);
auto& nodes = fragment_params.fragment.plan.nodes;
DCHECK(nodes.size() > 0);
auto& plan_node = nodes.at(0);
std::vector<std::shared_ptr<doris::vectorized::FutureBlock>> future_blocks;
{
std::shared_ptr<LoadBlockQueue> load_block_queue;
// 1. FileScanNode consumes data from the pipe.
std::unique_ptr<RuntimeState> runtime_state = RuntimeState::create_unique();
TUniqueId load_id;
load_id.hi = ctx->id.hi;
load_id.lo = ctx->id.lo;
TQueryOptions query_options;
query_options.query_type = TQueryType::LOAD;
TQueryGlobals query_globals;
static_cast<void>(runtime_state->init(load_id, query_options, query_globals, _exec_env));
runtime_state->set_query_mem_tracker(std::make_shared<MemTrackerLimiter>(
MemTrackerLimiter::Type::LOAD, fmt::format("Load#Id={}", ctx->id.to_string()), -1));
DescriptorTbl* desc_tbl = nullptr;
RETURN_IF_ERROR(DescriptorTbl::create(runtime_state->obj_pool(), tdesc_tbl, &desc_tbl));
runtime_state->set_desc_tbl(desc_tbl);
auto file_scan_node =
vectorized::NewFileScanNode(runtime_state->obj_pool(), plan_node, *desc_tbl);
Status status = Status::OK();
auto sink = stream_load::GroupCommitBlockSink(
runtime_state->obj_pool(), file_scan_node.row_desc(),
fragment_params.fragment.output_exprs, &status);
std::unique_ptr<int, std::function<void(int*)>> close_scan_node_func((int*)0x01, [&](int*) {
if (load_block_queue != nullptr) {
load_block_queue->remove_load_id(load_id);
}
static_cast<void>(file_scan_node.close(runtime_state.get()));
static_cast<void>(sink.close(runtime_state.get(), status));
});
RETURN_IF_ERROR(file_scan_node.init(plan_node, runtime_state.get()));
RETURN_IF_ERROR(file_scan_node.prepare(runtime_state.get()));
std::vector<TScanRangeParams> params_vector;
params_vector.emplace_back(tscan_range_params);
file_scan_node.set_scan_ranges(params_vector);
RETURN_IF_ERROR(file_scan_node.open(runtime_state.get()));
RETURN_IF_ERROR(status);
RETURN_IF_ERROR(sink.init(fragment_params.fragment.output_sink));
RETURN_IF_ERROR_OR_CATCH_EXCEPTION(sink.prepare(runtime_state.get()));
RETURN_IF_ERROR(sink.open(runtime_state.get()));
// 2. Put the block into block queue.
std::unique_ptr<doris::vectorized::Block> _block =
doris::vectorized::Block::create_unique();
bool first = true;
bool eof = false;
while (!eof) {
// TODO what to do if scan one block error
RETURN_IF_ERROR(file_scan_node.get_next(runtime_state.get(), _block.get(), &eof));
RETURN_IF_ERROR(sink.send(runtime_state.get(), _block.get()));
std::shared_ptr<doris::vectorized::FutureBlock> future_block =
std::make_shared<doris::vectorized::FutureBlock>();
future_block->swap(*(_block.get()));
future_block->set_info(ctx->schema_version, load_id, first, eof);
// TODO what to do if add one block error
if (load_block_queue == nullptr) {
RETURN_IF_ERROR(_get_first_block_load_queue(ctx->db_id, ctx->table_id, future_block,
load_block_queue));
ctx->label = load_block_queue->label;
ctx->txn_id = load_block_queue->txn_id;
}
if (future_block->rows() > 0) {
future_blocks.emplace_back(future_block);
}
RETURN_IF_ERROR(load_block_queue->add_block(future_block));
first = false;
}
ctx->number_unselected_rows = runtime_state->num_rows_load_unselected();
ctx->number_filtered_rows = runtime_state->num_rows_load_filtered();
ctx->error_url = runtime_state->get_error_log_file_path();
if (!runtime_state->get_error_log_file_path().empty()) {
LOG(INFO) << "id=" << print_id(load_id)
<< ", url=" << runtime_state->get_error_log_file_path()
<< ", load rows=" << runtime_state->num_rows_load_total()
<< ", filter rows=" << runtime_state->num_rows_load_filtered()
<< ", unselect rows=" << runtime_state->num_rows_load_unselected()
<< ", success rows=" << runtime_state->num_rows_load_success();
}
}
int64_t total_rows = 0;
int64_t loaded_rows = 0;
// 3. wait to wal
for (const auto& future_block : future_blocks) {
std::unique_lock<doris::Mutex> l(*(future_block->lock));
if (!future_block->is_handled()) {
future_block->cv->wait(l);
}
// future_block->get_status()
total_rows += future_block->get_total_rows();
loaded_rows += future_block->get_loaded_rows();
}
ctx->number_total_rows = total_rows + ctx->number_unselected_rows + ctx->number_filtered_rows;
ctx->number_loaded_rows = loaded_rows;
ctx->number_filtered_rows += total_rows - ctx->number_loaded_rows;
ctx->promise.set_value(Status::OK());
VLOG_DEBUG << "finish read all block of pipe=" << ctx->id.to_string()
<< ", total rows=" << ctx->number_total_rows
<< ", loaded rows=" << ctx->number_loaded_rows
<< ", filtered rows=" << ctx->number_filtered_rows
<< ", unselected rows=" << ctx->number_unselected_rows;
return Status::OK();
}
Status GroupCommitMgr::_get_first_block_load_queue(
Status GroupCommitMgr::get_first_block_load_queue(
int64_t db_id, int64_t table_id, std::shared_ptr<vectorized::FutureBlock> block,
std::shared_ptr<LoadBlockQueue>& load_block_queue) {
std::shared_ptr<GroupCommitTable> group_commit_table;
{
std::lock_guard wlock(_lock);
if (_table_map.find(table_id) == _table_map.end()) {
_table_map.emplace(table_id,
std::make_shared<GroupCommitTable>(_exec_env, db_id, table_id));
_table_map.emplace(table_id, std::make_shared<GroupCommitTable>(
_exec_env, _thread_pool.get(), db_id, table_id));
}
group_commit_table = _table_map[table_id];
}