Implement the routine load process of Kafka on Backend (#671)
This commit is contained in:
@ -178,12 +178,12 @@ set_target_properties(brpc PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib64/
|
||||
add_library(rocksdb STATIC IMPORTED)
|
||||
set_target_properties(rocksdb PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/librocksdb.a)
|
||||
|
||||
add_library(librdkafka STATIC IMPORTED)
|
||||
set_target_properties(librdkafka PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/librdkafka.a)
|
||||
|
||||
add_library(librdkafka_cpp STATIC IMPORTED)
|
||||
set_target_properties(librdkafka_cpp PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/librdkafka++.a)
|
||||
|
||||
add_library(librdkafka STATIC IMPORTED)
|
||||
set_target_properties(librdkafka PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/librdkafka.a)
|
||||
|
||||
find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin)
|
||||
|
||||
# llvm-config
|
||||
@ -440,9 +440,10 @@ set(DORIS_LINK_LIBS
|
||||
|
||||
# Set thirdparty libraries
|
||||
set(DORIS_DEPENDENCIES
|
||||
${WL_START_GROUP}
|
||||
rocksdb
|
||||
librdkafka
|
||||
librdkafka_cpp
|
||||
librdkafka
|
||||
lzo
|
||||
snappy
|
||||
${Boost_LIBRARIES}
|
||||
@ -456,7 +457,6 @@ set(DORIS_DEPENDENCIES
|
||||
libevent
|
||||
mysql
|
||||
curl
|
||||
${WL_START_GROUP}
|
||||
${LIBZ}
|
||||
${LIBBZ2}
|
||||
gflags
|
||||
@ -464,8 +464,8 @@ set(DORIS_DEPENDENCIES
|
||||
protobuf
|
||||
openssl
|
||||
crypto
|
||||
${WL_START_GROUP}
|
||||
leveldb
|
||||
${WL_END_GROUP}
|
||||
)
|
||||
|
||||
# Add all external dependencies. They should come after the palo libs.
|
||||
|
||||
@ -34,6 +34,9 @@ const Status Status::MEM_LIMIT_EXCEEDED(
|
||||
const Status Status::THRIFT_RPC_ERROR(
|
||||
TStatusCode::THRIFT_RPC_ERROR, "Thrift RPC failed", true);
|
||||
|
||||
const Status Status::TIMEOUT(
|
||||
TStatusCode::TIMEOUT, "timeout", true);
|
||||
|
||||
Status::ErrorDetail::ErrorDetail(const TStatus& status) :
|
||||
error_code(status.status_code),
|
||||
error_msgs(status.error_msgs) {
|
||||
|
||||
@ -54,6 +54,7 @@ public:
|
||||
static const Status CANCELLED;
|
||||
static const Status MEM_LIMIT_EXCEEDED;
|
||||
static const Status THRIFT_RPC_ERROR;
|
||||
static const Status TIMEOUT;
|
||||
|
||||
// copy c'tor makes copy of error detail so Status can be returned by value
|
||||
Status(const Status& status) : _error_detail(
|
||||
|
||||
@ -15,9 +15,34 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/kafka_consumer_pipe.h"
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace doris {
|
||||
|
||||
struct AuthInfo {
|
||||
std::string user;
|
||||
std::string passwd;
|
||||
std::string cluster;
|
||||
std::string user_ip;
|
||||
// -1 as unset
|
||||
int64_t auth_code = -1;
|
||||
};
|
||||
|
||||
} // end namespace doris
|
||||
template<class T>
|
||||
void set_request_auth(T* req, const AuthInfo& auth) {
|
||||
if (auth.auth_code != -1) {
|
||||
// if auth_code is set, no need to set other info
|
||||
req->auth_code = auth.auth_code;
|
||||
} else {
|
||||
req->user = auth.user;
|
||||
req->passwd = auth.passwd;
|
||||
if (!auth.cluster.empty()) {
|
||||
req->__set_cluster(auth.cluster);
|
||||
}
|
||||
req->__set_user_ip(auth.user_ip);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@ -75,7 +75,7 @@ set(EXEC_FILES
|
||||
schema_scanner/schema_columns_scanner.cpp
|
||||
schema_scanner/schema_charsets_scanner.cpp
|
||||
schema_scanner/schema_collations_scanner.cpp
|
||||
schema_scanner/frontend_helper.cpp
|
||||
schema_scanner/schema_helper.cpp
|
||||
partitioned_hash_table.cc
|
||||
partitioned_hash_table_ir.cc
|
||||
partitioned_aggregation_node.cc
|
||||
|
||||
@ -24,8 +24,8 @@
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
#include "runtime/raw_value.h"
|
||||
#include "runtime/load_stream_mgr.h"
|
||||
#include "runtime/stream_load_pipe.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/stream_load_pipe.h"
|
||||
#include "runtime/tuple.h"
|
||||
#include "exprs/expr.h"
|
||||
#include "exec/text_converter.h"
|
||||
|
||||
@ -310,7 +310,7 @@ Status PlainTextLineReader::read_line(const uint8_t** ptr, size_t* size, bool* e
|
||||
|
||||
// (cmy), for now, return failed to avoid potential endless loop
|
||||
std::stringstream ss;
|
||||
ss << "decompress made no progess."
|
||||
ss << "decompress made no progress."
|
||||
<< " input_read_bytes: " << input_read_bytes
|
||||
<< " decompressed_len: " << decompressed_len;
|
||||
LOG(WARNING) << ss.str();
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#include "exec/text_converter.hpp"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
#include "gen_cpp/PlanNodes_types.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/row_batch.h"
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "runtime/string_value.h"
|
||||
#include "runtime/datetime_value.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -74,7 +74,7 @@ Status SchemaColumnsScanner::start(RuntimeState *state) {
|
||||
}
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::get_db_names(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip),
|
||||
_param->port, db_params, &_db_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
@ -152,7 +152,7 @@ Status SchemaColumnsScanner::fill_one_row(Tuple *tuple, MemPool *pool) {
|
||||
{
|
||||
void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset());
|
||||
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
||||
std::string db_name = FrontendHelper::extract_db_name(_db_result.dbs[_db_index - 1]);
|
||||
std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]);
|
||||
str_slot->ptr = (char *)pool->allocate(db_name.size());
|
||||
str_slot->len = db_name.size();
|
||||
memcpy(str_slot->ptr, db_name.c_str(), str_slot->len);
|
||||
@ -328,7 +328,7 @@ Status SchemaColumnsScanner::get_new_desc() {
|
||||
}
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::describe_table(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::describe_table(*(_param->ip),
|
||||
_param->port, desc_params, &_desc_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
@ -352,7 +352,7 @@ Status SchemaColumnsScanner::get_new_table() {
|
||||
}
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::get_table_names(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::get_table_names(*(_param->ip),
|
||||
_param->port, table_params, &_table_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
@ -35,6 +35,7 @@
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "runtime/client_cache.h"
|
||||
#include "util/debug_util.h"
|
||||
#include "util/frontend_helper.h"
|
||||
#include "util/network_util.h"
|
||||
#include "util/thrift_util.h"
|
||||
#include "util/runtime_profile.h"
|
||||
@ -42,113 +43,62 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
ExecEnv* FrontendHelper::_s_exec_env;
|
||||
|
||||
using apache::thrift::protocol::TProtocol;
|
||||
using apache::thrift::protocol::TBinaryProtocol;
|
||||
using apache::thrift::transport::TSocket;
|
||||
using apache::thrift::transport::TTransport;
|
||||
using apache::thrift::transport::TBufferedTransport;
|
||||
|
||||
void FrontendHelper::setup(ExecEnv* exec_env) {
|
||||
_s_exec_env = exec_env;
|
||||
}
|
||||
|
||||
Status FrontendHelper::get_db_names(
|
||||
Status SchemaHelper::get_db_names(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetDbsParams &request,
|
||||
TGetDbsResult *result) {
|
||||
return rpc(ip, port,
|
||||
return FrontendHelper::rpc(ip, port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->getDbNames(*result, request);
|
||||
});
|
||||
}
|
||||
|
||||
Status FrontendHelper::get_table_names(
|
||||
Status SchemaHelper::get_table_names(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetTablesParams &request,
|
||||
TGetTablesResult *result) {
|
||||
return rpc(ip, port,
|
||||
return FrontendHelper::rpc(ip, port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->getTableNames(*result, request);
|
||||
});
|
||||
}
|
||||
|
||||
Status FrontendHelper::list_table_status(
|
||||
Status SchemaHelper::list_table_status(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetTablesParams &request,
|
||||
TListTableStatusResult *result) {
|
||||
return rpc(ip, port,
|
||||
return FrontendHelper::rpc(ip, port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->listTableStatus(*result, request);
|
||||
});
|
||||
}
|
||||
|
||||
Status FrontendHelper::describe_table(
|
||||
Status SchemaHelper::describe_table(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TDescribeTableParams &request,
|
||||
TDescribeTableResult *result) {
|
||||
return rpc(ip, port,
|
||||
return FrontendHelper::rpc(ip, port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->describeTable(*result, request);
|
||||
});
|
||||
}
|
||||
|
||||
Status FrontendHelper::show_varialbes(
|
||||
Status SchemaHelper::show_varialbes(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TShowVariableRequest &request,
|
||||
TShowVariableResult *result) {
|
||||
return rpc(ip, port,
|
||||
return FrontendHelper::rpc(ip, port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->showVariables(*result, request);
|
||||
});
|
||||
}
|
||||
|
||||
Status FrontendHelper::rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (FrontendServiceConnection&)> callback,
|
||||
int timeout_ms) {
|
||||
TNetworkAddress address = make_network_address(ip, port);
|
||||
Status status;
|
||||
FrontendServiceConnection client(
|
||||
_s_exec_env->frontend_client_cache(), address, timeout_ms, &status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "Connect frontent failed, address=" << address
|
||||
<< ", status=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
try {
|
||||
try {
|
||||
callback(client);
|
||||
} catch (apache::thrift::transport::TTransportException& e) {
|
||||
LOG(WARNING) << "retrying call frontend service, address="
|
||||
<< address << ", reason=" << e.what();
|
||||
status = client.reopen(timeout_ms);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "client repoen failed. address=" << address
|
||||
<< ", status=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
callback(client);
|
||||
}
|
||||
} catch (apache::thrift::TException& e) {
|
||||
// just reopen to disable this connection
|
||||
client.reopen(timeout_ms);
|
||||
LOG(WARNING) << "call frontend service failed, address=" << address
|
||||
<< ", reason=" << e.what();
|
||||
return Status(TStatusCode::THRIFT_RPC_ERROR,
|
||||
"failed to call frontend service", false);
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
std::string FrontendHelper::extract_db_name(const std::string& full_name) {
|
||||
std::string SchemaHelper::extract_db_name(const std::string& full_name) {
|
||||
auto found = full_name.find(':');
|
||||
if (found == std::string::npos) {
|
||||
return full_name;
|
||||
@ -15,42 +15,40 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#ifndef DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_FRONTEND_HELPER_H
|
||||
#define DORIS_BE_SRC_QUERY_EXEC_SCHEMA_SCANNER_FRONTEND_HELPER_H
|
||||
#pragma once
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class ExecEnv;
|
||||
class FrontendServiceClient;
|
||||
template <class T> class ClientConnection;
|
||||
|
||||
// this class is a helper for jni call. easy for unit test
|
||||
class FrontendHelper {
|
||||
// this class is a helper for getting schema info from FE
|
||||
class SchemaHelper {
|
||||
public:
|
||||
static void setup(ExecEnv* exec_env);
|
||||
static Status get_db_names(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetDbsParams &db_params,
|
||||
TGetDbsResult *db_result);
|
||||
|
||||
static Status get_table_names(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetTablesParams &table_params,
|
||||
TGetTablesResult *table_result);
|
||||
|
||||
static Status list_table_status(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TGetTablesParams &table_params,
|
||||
TListTableStatusResult *table_result);
|
||||
|
||||
static Status describe_table(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
const TDescribeTableParams &desc_params,
|
||||
TDescribeTableResult *desc_result);
|
||||
|
||||
static Status show_varialbes(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
@ -58,27 +56,7 @@ public:
|
||||
TShowVariableResult *var_result);
|
||||
|
||||
static std::string extract_db_name(const std::string& full_name);
|
||||
|
||||
// for default timeout
|
||||
static Status rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (ClientConnection<FrontendServiceClient>&)> callback) {
|
||||
|
||||
return rpc(ip, port, callback, config::thrift_rpc_timeout_ms);
|
||||
}
|
||||
|
||||
static Status rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (ClientConnection<FrontendServiceClient>&)> callback,
|
||||
int timeout_ms);
|
||||
|
||||
private:
|
||||
static ExecEnv* _s_exec_env;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
#include "exec/schema_scanner/schema_schemata_scanner.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "runtime/string_value.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -54,7 +54,7 @@ Status SchemaSchemataScanner::start(RuntimeState *state) {
|
||||
db_params.__set_user_ip(*(_param->user_ip));
|
||||
}
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::get_db_names(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip),
|
||||
_param->port, db_params, &_db_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
@ -75,7 +75,7 @@ Status SchemaSchemataScanner::fill_one_row(Tuple *tuple, MemPool *pool) {
|
||||
{
|
||||
void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset());
|
||||
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
||||
std::string db_name = FrontendHelper::extract_db_name(_db_result.dbs[_db_index]);
|
||||
std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index]);
|
||||
str_slot->ptr = (char *)pool->allocate(db_name.size());
|
||||
str_slot->len = db_name.size();
|
||||
memcpy(str_slot->ptr, db_name.c_str(), str_slot->len);
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
#include "exec/schema_scanner/schema_tables_scanner.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "runtime/string_value.h"
|
||||
@ -75,7 +75,7 @@ Status SchemaTablesScanner::start(RuntimeState *state) {
|
||||
}
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::get_db_names(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::get_db_names(*(_param->ip),
|
||||
_param->port, db_params, &_db_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
@ -95,7 +95,7 @@ Status SchemaTablesScanner::fill_one_row(Tuple *tuple, MemPool *pool) {
|
||||
{
|
||||
void *slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset());
|
||||
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
||||
std::string db_name = FrontendHelper::extract_db_name(_db_result.dbs[_db_index - 1]);
|
||||
std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]);
|
||||
str_slot->ptr = (char *)pool->allocate(db_name.size());
|
||||
str_slot->len = db_name.size();
|
||||
memcpy(str_slot->ptr, db_name.c_str(), str_slot->len);
|
||||
@ -232,7 +232,7 @@ Status SchemaTablesScanner::get_new_table() {
|
||||
}
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::list_table_status(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::list_table_status(*(_param->ip),
|
||||
_param->port, table_params, &_table_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
|
||||
@ -19,7 +19,7 @@
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "runtime/string_value.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -53,7 +53,7 @@ Status SchemaVariablesScanner::start(RuntimeState *state) {
|
||||
var_params.__set_threadId(_param->thread_id);
|
||||
|
||||
if (NULL != _param->ip && 0 != _param->port) {
|
||||
RETURN_IF_ERROR(FrontendHelper::show_varialbes(*(_param->ip),
|
||||
RETURN_IF_ERROR(SchemaHelper::show_varialbes(*(_param->ip),
|
||||
_param->port, var_params, &_var_result));
|
||||
} else {
|
||||
return Status("IP or port dosn't exists");
|
||||
|
||||
@ -29,7 +29,6 @@ add_library(Webserver STATIC
|
||||
http_channel.cpp
|
||||
http_status.cpp
|
||||
http_parser.cpp
|
||||
message_body_sink.cpp
|
||||
web_page_handler.cpp
|
||||
monitor_action.cpp
|
||||
default_path_handlers.cpp
|
||||
|
||||
@ -29,7 +29,8 @@
|
||||
#include <thrift/protocol/TDebugProtocol.h>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "common/utils.h"
|
||||
#include "util/frontend_helper.h"
|
||||
#include "gen_cpp/FrontendService.h"
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
#include "gen_cpp/HeartbeatService_types.h"
|
||||
@ -44,8 +45,10 @@
|
||||
#include "runtime/fragment_mgr.h"
|
||||
#include "runtime/load_path_mgr.h"
|
||||
#include "runtime/plan_fragment_executor.h"
|
||||
#include "runtime/stream_load_pipe.h"
|
||||
#include "runtime/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
#include "runtime/stream_load/stream_load_pipe.h"
|
||||
#include "runtime/stream_load/stream_load_context.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "util/byte_buffer.h"
|
||||
#include "util/debug_util.h"
|
||||
#include "util/json_util.h"
|
||||
@ -62,11 +65,7 @@ IntCounter k_streaming_load_duration_ms;
|
||||
static IntGauge k_streaming_load_current_processing;
|
||||
|
||||
#ifdef BE_TEST
|
||||
TLoadTxnBeginResult k_stream_load_begin_result;
|
||||
TLoadTxnCommitResult k_stream_load_commit_result;
|
||||
TLoadTxnRollbackResult k_stream_load_rollback_result;
|
||||
TStreamLoadPutResult k_stream_load_put_result;
|
||||
Status k_stream_load_plan_status;
|
||||
#endif
|
||||
|
||||
static TFileFormatType::type parse_format(const std::string& format_str) {
|
||||
@ -85,134 +84,6 @@ static bool is_format_support_streaming(TFileFormatType::type format) {
|
||||
}
|
||||
}
|
||||
|
||||
// stream load context
|
||||
struct StreamLoadContext {
|
||||
StreamLoadContext(StreamLoadAction* action_) : action(action_), _refs(0) {
|
||||
start_nanos = MonotonicNanos();
|
||||
}
|
||||
|
||||
~StreamLoadContext();
|
||||
|
||||
StreamLoadAction* action;
|
||||
// id for each load
|
||||
UniqueId id;
|
||||
|
||||
std::string db;
|
||||
std::string table;
|
||||
// load label, used to identify
|
||||
std::string label;
|
||||
|
||||
std::string user_ip;
|
||||
|
||||
HttpAuthInfo auth;
|
||||
|
||||
// only used to check if we receive whole body
|
||||
size_t body_bytes = 0;
|
||||
size_t receive_bytes = 0;
|
||||
|
||||
int64_t txn_id = -1;
|
||||
|
||||
bool need_rollback = false;
|
||||
// when use_streaming is true, we use stream_pipe to send source data,
|
||||
// otherwise we save source data to file first, then process it.
|
||||
bool use_streaming = false;
|
||||
TFileFormatType::type format = TFileFormatType::FORMAT_CSV_PLAIN;
|
||||
|
||||
std::shared_ptr<MessageBodySink> body_sink;
|
||||
|
||||
TStreamLoadPutResult put_result;
|
||||
double max_filter_ratio = 0.0;
|
||||
std::vector<TTabletCommitInfo> commit_infos;
|
||||
|
||||
std::promise<Status> promise;
|
||||
std::future<Status> future = promise.get_future();
|
||||
|
||||
Status status;
|
||||
|
||||
int64_t number_loaded_rows = 0;
|
||||
int64_t number_filtered_rows = 0;
|
||||
int64_t start_nanos = 0;
|
||||
int64_t load_cost_nanos = 0;
|
||||
std::string error_url;
|
||||
|
||||
std::string to_json() const;
|
||||
|
||||
std::string brief() const;
|
||||
|
||||
void ref() { _refs.fetch_add(1); }
|
||||
// If unref() returns true, this object should be delete
|
||||
bool unref() { return _refs.fetch_sub(1) == 1; }
|
||||
|
||||
private:
|
||||
std::atomic<int> _refs;
|
||||
};
|
||||
|
||||
StreamLoadContext::~StreamLoadContext() {
|
||||
if (need_rollback) {
|
||||
action->rollback(this);
|
||||
need_rollback = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::string StreamLoadContext::to_json() const {
|
||||
rapidjson::StringBuffer s;
|
||||
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(s);
|
||||
|
||||
writer.StartObject();
|
||||
// txn id
|
||||
writer.Key("TxnId");
|
||||
writer.Int64(txn_id);
|
||||
|
||||
// label
|
||||
writer.Key("Label");
|
||||
writer.String(label.c_str());
|
||||
|
||||
// status
|
||||
writer.Key("Status");
|
||||
switch (status.code()) {
|
||||
case TStatusCode::OK:
|
||||
writer.String("Success");
|
||||
break;
|
||||
case TStatusCode::PUBLISH_TIMEOUT:
|
||||
writer.String("Publish Timeout");
|
||||
break;
|
||||
case TStatusCode::LABEL_ALREADY_EXISTS:
|
||||
writer.String("Label Already Exists");
|
||||
break;
|
||||
default:
|
||||
writer.String("Fail");
|
||||
break;
|
||||
}
|
||||
// msg
|
||||
writer.Key("Message");
|
||||
if (status.ok()) {
|
||||
writer.String("OK");
|
||||
} else {
|
||||
writer.String(status.get_error_msg().c_str());
|
||||
}
|
||||
// number_load_rows
|
||||
writer.Key("NumberLoadedRows");
|
||||
writer.Int64(number_loaded_rows);
|
||||
writer.Key("NumberFilteredRows");
|
||||
writer.Int64(number_filtered_rows);
|
||||
writer.Key("LoadBytes");
|
||||
writer.Int64(receive_bytes);
|
||||
writer.Key("LoadTimeMs");
|
||||
writer.Int64(load_cost_nanos / 1000000);
|
||||
if (!error_url.empty()) {
|
||||
writer.Key("ErrorURL");
|
||||
writer.String(error_url.c_str());
|
||||
}
|
||||
writer.EndObject();
|
||||
return s.GetString();
|
||||
}
|
||||
|
||||
std::string StreamLoadContext::brief() const {
|
||||
std::stringstream ss;
|
||||
ss << " id=" << id << ", txn id=" << txn_id << ", label=" << label;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
StreamLoadAction::StreamLoadAction(ExecEnv* exec_env) : _exec_env(exec_env) {
|
||||
DorisMetrics::metrics()->register_metric("streaming_load_requests_total",
|
||||
&k_streaming_load_requests_total);
|
||||
@ -245,7 +116,7 @@ void StreamLoadAction::handle(HttpRequest* req) {
|
||||
|
||||
if (!ctx->status.ok()) {
|
||||
if (ctx->need_rollback) {
|
||||
rollback(ctx);
|
||||
_exec_env->stream_load_executor()->rollback_txn(ctx);
|
||||
ctx->need_rollback = false;
|
||||
}
|
||||
if (ctx->body_sink.get() != nullptr) {
|
||||
@ -272,10 +143,10 @@ Status StreamLoadAction::_handle(StreamLoadContext* ctx) {
|
||||
}
|
||||
if (!ctx->use_streaming) {
|
||||
// if we use non-streaming, we need to close file first,
|
||||
// then _execute_plan_fragment here
|
||||
// then execute_plan_fragment here
|
||||
// this will close file
|
||||
ctx->body_sink.reset();
|
||||
RETURN_IF_ERROR(_execute_plan_fragment(ctx));
|
||||
RETURN_IF_ERROR(_exec_env->stream_load_executor()->execute_plan_fragment(ctx));
|
||||
} else {
|
||||
RETURN_IF_ERROR(ctx->body_sink->finish());
|
||||
}
|
||||
@ -284,36 +155,7 @@ Status StreamLoadAction::_handle(StreamLoadContext* ctx) {
|
||||
RETURN_IF_ERROR(ctx->future.get());
|
||||
|
||||
// If put file succeess we need commit this load
|
||||
TLoadTxnCommitRequest request;
|
||||
set_http_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.txnId = ctx->txn_id;
|
||||
request.sync = true;
|
||||
request.commitInfos = std::move(ctx->commit_infos);
|
||||
request.__isset.commitInfos = true;
|
||||
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
TLoadTxnCommitResult result;
|
||||
#ifndef BE_TEST
|
||||
RETURN_IF_ERROR(FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnCommit(result, request);
|
||||
}, config::txn_commit_rpc_timeout_ms));
|
||||
#else
|
||||
result = k_stream_load_commit_result;
|
||||
#endif
|
||||
// Return if this transaction is committed successful; otherwise, we need try to
|
||||
// rollback this transaction
|
||||
Status status(result.status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "commit transaction failed, id=" << ctx->id
|
||||
<< ", errmsg=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
// commit success, set need_rollback to false
|
||||
ctx->need_rollback = false;
|
||||
RETURN_IF_ERROR(_exec_env->stream_load_executor()->commit_txn(ctx));
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
@ -321,9 +163,12 @@ Status StreamLoadAction::_handle(StreamLoadContext* ctx) {
|
||||
int StreamLoadAction::on_header(HttpRequest* req) {
|
||||
k_streaming_load_current_processing.increment(1);
|
||||
|
||||
StreamLoadContext* ctx = new StreamLoadContext(this);
|
||||
StreamLoadContext* ctx = new StreamLoadContext(_exec_env);
|
||||
ctx->ref();
|
||||
req->set_handler_ctx(ctx);
|
||||
|
||||
ctx->load_type = TLoadType::MANUL_LOAD;
|
||||
ctx->load_src_type = TLoadSourceType::RAW;
|
||||
|
||||
ctx->db = req->param(HTTP_DB_KEY);
|
||||
ctx->table = req->param(HTTP_TABLE_KEY);
|
||||
@ -339,7 +184,7 @@ int StreamLoadAction::on_header(HttpRequest* req) {
|
||||
if (!st.ok()) {
|
||||
ctx->status = st;
|
||||
if (ctx->need_rollback) {
|
||||
rollback(ctx);
|
||||
_exec_env->stream_load_executor()->rollback_txn(ctx);
|
||||
ctx->need_rollback = false;
|
||||
}
|
||||
if (ctx->body_sink.get() != nullptr) {
|
||||
@ -394,34 +239,7 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, StreamLoadContext* ct
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
|
||||
// begin transaction
|
||||
{
|
||||
TLoadTxnBeginRequest request;
|
||||
set_http_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.label = ctx->label;
|
||||
// set timestamp
|
||||
request.__set_timestamp(GetCurrentTimeMicros());
|
||||
|
||||
TLoadTxnBeginResult result;
|
||||
#ifndef BE_TEST
|
||||
RETURN_IF_ERROR(FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnBegin(result, request);
|
||||
}));
|
||||
#else
|
||||
result = k_stream_load_begin_result;
|
||||
#endif
|
||||
Status status(result.status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "begin transaction failed, errmsg=" << status.get_error_msg()
|
||||
<< ctx->brief();
|
||||
return status;
|
||||
}
|
||||
ctx->txn_id = result.txnId;
|
||||
ctx->need_rollback = true;
|
||||
}
|
||||
RETURN_IF_ERROR(_exec_env->stream_load_executor()->begin_txn(ctx));
|
||||
|
||||
// process put file
|
||||
return _process_put(http_req, ctx);
|
||||
@ -453,7 +271,7 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) {
|
||||
}
|
||||
|
||||
void StreamLoadAction::free_handler_ctx(void* param) {
|
||||
StreamLoadContext* ctx = (StreamLoadContext*)param;
|
||||
StreamLoadContext* ctx = (StreamLoadContext*) param;
|
||||
if (ctx == nullptr) {
|
||||
return;
|
||||
}
|
||||
@ -472,7 +290,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext*
|
||||
|
||||
// put request
|
||||
TStreamLoadPutRequest request;
|
||||
set_http_auth(&request, ctx->auth);
|
||||
set_request_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.txnId = ctx->txn_id;
|
||||
@ -531,79 +349,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext*
|
||||
if (!ctx->use_streaming) {
|
||||
return Status::OK;
|
||||
}
|
||||
return _execute_plan_fragment(ctx);
|
||||
}
|
||||
|
||||
Status StreamLoadAction::_execute_plan_fragment(StreamLoadContext* ctx) {
|
||||
// submit this params
|
||||
#ifndef BE_TEST
|
||||
ctx->ref();
|
||||
auto st = _exec_env->fragment_mgr()->exec_plan_fragment(
|
||||
ctx->put_result.params,
|
||||
[ctx] (PlanFragmentExecutor* executor) {
|
||||
ctx->commit_infos = std::move(executor->runtime_state()->tablet_commit_infos());
|
||||
Status status = executor->status();
|
||||
if (status.ok()) {
|
||||
ctx->number_loaded_rows = executor->runtime_state()->num_rows_load_success();
|
||||
ctx->number_filtered_rows = executor->runtime_state()->num_rows_load_filtered();
|
||||
int64_t num_total_rows =
|
||||
ctx->number_loaded_rows + ctx->number_filtered_rows;
|
||||
if ((0.0 + ctx->number_filtered_rows) / num_total_rows > ctx->max_filter_ratio) {
|
||||
status = Status("too many filtered rows");
|
||||
}
|
||||
if (ctx->number_filtered_rows > 0 &&
|
||||
!executor->runtime_state()->get_error_log_file_path().empty()) {
|
||||
ctx->error_url = to_load_error_http_path(
|
||||
executor->runtime_state()->get_error_log_file_path());
|
||||
}
|
||||
} else {
|
||||
LOG(WARNING) << "fragment execute failed"
|
||||
<< ", query_id=" << UniqueId(ctx->put_result.params.params.query_id)
|
||||
<< ", errmsg=" << status.get_error_msg()
|
||||
<< ctx->brief();
|
||||
// cancel body_sink, make sender known it
|
||||
if (ctx->body_sink != nullptr) {
|
||||
ctx->body_sink->cancel();
|
||||
}
|
||||
}
|
||||
ctx->promise.set_value(status);
|
||||
if (ctx->unref()) {
|
||||
delete ctx;
|
||||
}
|
||||
});
|
||||
if (!st.ok()) {
|
||||
// no need to check unref's return value
|
||||
ctx->unref();
|
||||
return st;
|
||||
}
|
||||
#else
|
||||
ctx->promise.set_value(k_stream_load_plan_status);
|
||||
#endif
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
void StreamLoadAction::rollback(StreamLoadContext* ctx) {
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
TLoadTxnRollbackRequest request;
|
||||
set_http_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.txnId = ctx->txn_id;
|
||||
request.__set_reason(ctx->status.get_error_msg());
|
||||
TLoadTxnRollbackResult result;
|
||||
#ifndef BE_TEST
|
||||
auto rpc_st = FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnRollback(result, request);
|
||||
});
|
||||
if (!rpc_st.ok()) {
|
||||
LOG(WARNING) << "transaction rollback failed. errmsg=" << rpc_st.get_error_msg()
|
||||
<< ctx->brief();
|
||||
}
|
||||
#else
|
||||
result = k_stream_load_rollback_result;
|
||||
#endif
|
||||
return _exec_env->stream_load_executor()->execute_plan_fragment(ctx);
|
||||
}
|
||||
|
||||
Status StreamLoadAction::_data_saved_path(HttpRequest* req, std::string* file_path) {
|
||||
|
||||
@ -19,19 +19,15 @@
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/PlanNodes_types.h"
|
||||
#include "http/http_handler.h"
|
||||
#include "http/message_body_sink.h"
|
||||
#include "runtime/client_cache.h"
|
||||
#include "runtime/message_body_sink.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class ExecEnv;
|
||||
class TStreamLoadPutRequest;
|
||||
class TStreamLoadPutResult;
|
||||
class HttpAuthInfo;
|
||||
class TTabletCommitInfo;
|
||||
class Status;
|
||||
class StreamLoadContext;
|
||||
|
||||
class StreamLoadAction : public HttpHandler {
|
||||
@ -48,9 +44,6 @@ public:
|
||||
void on_chunk_data(HttpRequest* req) override;
|
||||
void free_handler_ctx(void* ctx) override;
|
||||
|
||||
// called by deconstructor
|
||||
void rollback(StreamLoadContext* ctx);
|
||||
|
||||
private:
|
||||
Status _on_header(HttpRequest* http_req, StreamLoadContext* ctx);
|
||||
Status _handle(StreamLoadContext* ctx);
|
||||
|
||||
@ -21,13 +21,6 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
struct HttpAuthInfo {
|
||||
std::string user;
|
||||
std::string passwd;
|
||||
std::string cluster;
|
||||
std::string user_ip;
|
||||
};
|
||||
|
||||
static const std::string HTTP_DB_KEY = "db";
|
||||
static const std::string HTTP_TABLE_KEY = "table";
|
||||
static const std::string HTTP_LABEL_KEY = "label";
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
#include <http/utils.h>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "common/utils.h"
|
||||
#include "http/http_common.h"
|
||||
#include "http/http_headers.h"
|
||||
#include "http/http_request.h"
|
||||
@ -54,7 +55,7 @@ bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* pa
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse_basic_auth(const HttpRequest& req, HttpAuthInfo* auth) {
|
||||
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
|
||||
std::string full_user;
|
||||
if (!parse_basic_auth(req, &full_user, &auth->passwd)) {
|
||||
return false;
|
||||
|
||||
@ -19,12 +19,12 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "common/utils.h"
|
||||
#include "http/http_common.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class HttpRequest;
|
||||
class HttpAuthInfo;
|
||||
|
||||
std::string encode_basic_auth(const std::string& user, const std::string& passwd);
|
||||
// parse Basic authorization
|
||||
@ -32,16 +32,6 @@ std::string encode_basic_auth(const std::string& user, const std::string& passwd
|
||||
// Otherwise return fasle
|
||||
bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* passwd);
|
||||
|
||||
bool parse_basic_auth(const HttpRequest& req, HttpAuthInfo* auth);
|
||||
|
||||
template<class T>
|
||||
void set_http_auth(T* req, const HttpAuthInfo& auth) {
|
||||
req->user = auth.user;
|
||||
req->passwd = auth.passwd;
|
||||
if (!auth.cluster.empty()) {
|
||||
req->__set_cluster(auth.cluster);
|
||||
}
|
||||
req->__set_user_ip(auth.user_ip);
|
||||
}
|
||||
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth);
|
||||
|
||||
}
|
||||
|
||||
@ -93,8 +93,12 @@ add_library(Runtime STATIC
|
||||
bufferpool/system_allocator.cc
|
||||
initial_reservations.cc
|
||||
snapshot_loader.cpp
|
||||
kafka_consumer_pipe.cpp
|
||||
query_statistics.cpp
|
||||
message_body_sink.cpp
|
||||
stream_load/stream_load_context.cpp
|
||||
stream_load/stream_load_executor.cpp
|
||||
routine_load/data_consumer.cpp
|
||||
routine_load/routine_load_task_executor.cpp
|
||||
)
|
||||
|
||||
# This test runs forever so should not be part of 'make test'
|
||||
|
||||
@ -50,6 +50,8 @@ class ThreadPool;
|
||||
class ThreadResourceMgr;
|
||||
class TmpFileMgr;
|
||||
class WebPageHandler;
|
||||
class StreamLoadExecutor;
|
||||
class RoutineLoadTaskExecutor;
|
||||
|
||||
class BackendServiceClient;
|
||||
class FrontendServiceClient;
|
||||
@ -110,11 +112,15 @@ public:
|
||||
BufferPool* buffer_pool() { return _buffer_pool; }
|
||||
TabletWriterMgr* tablet_writer_mgr() { return _tablet_writer_mgr; }
|
||||
LoadStreamMgr* load_stream_mgr() { return _load_stream_mgr; }
|
||||
|
||||
const std::vector<StorePath>& store_paths() const { return _store_paths; }
|
||||
void set_store_paths(const std::vector<StorePath>& paths) { _store_paths = paths; }
|
||||
OLAPEngine* olap_engine() { return _olap_engine; }
|
||||
void set_olap_engine(OLAPEngine* olap_engine) { _olap_engine = olap_engine; }
|
||||
|
||||
StreamLoadExecutor* stream_load_executor() { return _stream_load_executor; }
|
||||
RoutineLoadTaskExecutor* routine_load_task_executor() { return _routine_load_task_executor; }
|
||||
|
||||
private:
|
||||
Status _init(const std::vector<StorePath>& store_paths);
|
||||
void _destory();
|
||||
@ -158,6 +164,9 @@ private:
|
||||
BufferPool* _buffer_pool = nullptr;
|
||||
|
||||
OLAPEngine* _olap_engine = nullptr;
|
||||
|
||||
StreamLoadExecutor* _stream_load_executor = nullptr;
|
||||
RoutineLoadTaskExecutor* _routine_load_task_executor = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -43,8 +43,10 @@
|
||||
#include "util/bfd_parser.h"
|
||||
#include "runtime/etl_job_mgr.h"
|
||||
#include "runtime/load_path_mgr.h"
|
||||
#include "runtime/load_stream_mgr.h"
|
||||
#include "runtime/pull_load_task_mgr.h"
|
||||
#include "runtime/routine_load/routine_load_task_executor.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
#include "util/pretty_printer.h"
|
||||
#include "util/doris_metrics.h"
|
||||
#include "util/brpc_stub_cache.h"
|
||||
@ -95,6 +97,8 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) {
|
||||
_tablet_writer_mgr = new TabletWriterMgr(this);
|
||||
_load_stream_mgr = new LoadStreamMgr();
|
||||
_brpc_stub_cache = new BrpcStubCache();
|
||||
_stream_load_executor = new StreamLoadExecutor(this);
|
||||
_routine_load_task_executor = new RoutineLoadTaskExecutor(this);
|
||||
|
||||
_client_cache->init_metrics(DorisMetrics::metrics(), "backend");
|
||||
_frontend_client_cache->init_metrics(DorisMetrics::metrics(), "frontend");
|
||||
@ -208,6 +212,9 @@ void ExecEnv::_destory() {
|
||||
delete _client_cache;
|
||||
delete _result_mgr;
|
||||
delete _stream_mgr;
|
||||
delete _stream_load_executor;
|
||||
delete _routine_load_task_executor;
|
||||
|
||||
_metrics = nullptr;
|
||||
}
|
||||
|
||||
|
||||
@ -15,16 +15,13 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "http/message_body_sink.h"
|
||||
#include "runtime/message_body_sink.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "http/http_channel.h"
|
||||
#include "http/http_parser.h"
|
||||
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
namespace doris {
|
||||
@ -23,9 +23,6 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
class HttpChannel;
|
||||
class BodySink;
|
||||
|
||||
class MessageBodySink {
|
||||
public:
|
||||
virtual ~MessageBodySink() { }
|
||||
@ -41,7 +38,7 @@ public:
|
||||
virtual void cancel() { }
|
||||
};
|
||||
|
||||
// write HTTP request's message-body to a local file
|
||||
// write message to a local file
|
||||
class MessageBodyFileSink : public MessageBodySink {
|
||||
public:
|
||||
MessageBodyFileSink(const std::string& path) : _path(path) { }
|
||||
195
be/src/runtime/routine_load/data_consumer.cpp
Normal file
195
be/src/runtime/routine_load/data_consumer.cpp
Normal file
@ -0,0 +1,195 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/routine_load/data_consumer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "runtime/routine_load/kafka_consumer_pipe.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/stopwatch.hpp"
|
||||
|
||||
namespace doris {
|
||||
|
||||
Status KafkaDataConsumer::init() {
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
if (_init) {
|
||||
// this consumer has already been initialized.
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
RdKafka::Conf *conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
|
||||
|
||||
// conf has to be deleted finally
|
||||
auto conf_deleter = [] (RdKafka::Conf *conf) { delete conf; };
|
||||
DeferOp delete_conf(std::bind<void>(conf_deleter, conf));
|
||||
|
||||
std::string errstr;
|
||||
#define SET_KAFKA_CONF(conf_key, conf_val) \
|
||||
if (conf->set(conf_key, conf_val, errstr) != RdKafka::Conf::CONF_OK) { \
|
||||
std::stringstream ss; \
|
||||
ss << "failed to set '" << conf_key << "'"; \
|
||||
LOG(WARNING) << ss.str(); \
|
||||
return Status(ss.str()); \
|
||||
}
|
||||
|
||||
SET_KAFKA_CONF("metadata.broker.list", _ctx->kafka_info->brokers);
|
||||
SET_KAFKA_CONF("group.id", _ctx->kafka_info->group_id);
|
||||
SET_KAFKA_CONF("client.id", _ctx->kafka_info->client_id);
|
||||
SET_KAFKA_CONF("enable.partition.eof", "false");
|
||||
SET_KAFKA_CONF("enable.auto.offset.store", "false");
|
||||
// TODO: set it larger than 0 after we set rd_kafka_conf_set_stats_cb()
|
||||
SET_KAFKA_CONF("statistics.interval.ms", "0");
|
||||
|
||||
// create consumer
|
||||
_k_consumer = RdKafka::KafkaConsumer::create(conf, errstr);
|
||||
if (!_k_consumer) {
|
||||
LOG(WARNING) << "failed to create kafka consumer";
|
||||
return Status("failed to create kafka consumer");
|
||||
}
|
||||
|
||||
// create TopicPartitions
|
||||
std::vector<RdKafka::TopicPartition*> topic_partitions;
|
||||
for (auto& entry : _ctx->kafka_info->begin_offset) {
|
||||
RdKafka::TopicPartition* tp1 = RdKafka::TopicPartition::create(
|
||||
_ctx->kafka_info->topic, entry.first, entry.second);
|
||||
topic_partitions.push_back(tp1);
|
||||
}
|
||||
|
||||
// delete TopicPartition finally
|
||||
auto tp_deleter = [] (const std::vector<RdKafka::TopicPartition*>& vec) {
|
||||
std::for_each(vec.begin(), vec.end(),
|
||||
[](RdKafka::TopicPartition* tp1) { delete tp1; });
|
||||
};
|
||||
DeferOp delete_tp(std::bind<void>(tp_deleter, topic_partitions));
|
||||
|
||||
// assign partition
|
||||
RdKafka::ErrorCode err = _k_consumer->assign(topic_partitions);
|
||||
if (err) {
|
||||
LOG(WARNING) << "failed to assign topic partitions: " << _ctx->brief(true)
|
||||
<< ", err: " << RdKafka::err2str(err);
|
||||
return Status("failed to assgin topic partitions");
|
||||
}
|
||||
|
||||
VLOG(3) << "finished to init kafka consumer. "
|
||||
<< _ctx->brief(true);
|
||||
|
||||
_init = true;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status KafkaDataConsumer::start() {
|
||||
{
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
if (!_init) {
|
||||
return Status("consumer is not initialized");
|
||||
}
|
||||
}
|
||||
|
||||
int64_t left_time = _ctx->kafka_info->max_interval_s;
|
||||
int64_t left_rows = _ctx->kafka_info->max_batch_rows;
|
||||
int64_t left_bytes = _ctx->kafka_info->max_batch_bytes;
|
||||
|
||||
LOG(INFO) << "start consumer"
|
||||
<< ". interval(s): " << left_time
|
||||
<< ", bath rows: " << left_rows
|
||||
<< ", batch size: " << left_bytes
|
||||
<< ". " << _ctx->brief();
|
||||
|
||||
MonotonicStopWatch watch;
|
||||
watch.start();
|
||||
Status st;
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
if (_cancelled) {
|
||||
st = Status::CANCELLED;
|
||||
break;
|
||||
}
|
||||
|
||||
if (_finished) {
|
||||
st = Status::OK;
|
||||
break;
|
||||
}
|
||||
|
||||
if (left_time <= 0 || left_rows <= 0 || left_bytes <=0) {
|
||||
VLOG(3) << "kafka consume batch finished"
|
||||
<< ". left time=" << left_time
|
||||
<< ", left rows=" << left_rows
|
||||
<< ", left bytes=" << left_bytes;
|
||||
_kafka_consumer_pipe->finish();
|
||||
_finished = true;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
// consume 1 message at a time
|
||||
RdKafka::Message *msg = _k_consumer->consume(1000 /* timeout, ms */);
|
||||
switch (msg->err()) {
|
||||
case RdKafka::ERR_NO_ERROR:
|
||||
VLOG(3) << "get kafka message, offset: " << msg->offset();
|
||||
st = _kafka_consumer_pipe->append_with_line_delimiter(
|
||||
static_cast<const char *>(msg->payload()),
|
||||
static_cast<size_t>(msg->len()));
|
||||
if (st.ok()) {
|
||||
left_rows--;
|
||||
left_bytes -= msg->len();
|
||||
_ctx->kafka_info->cmt_offset[msg->partition()] = msg->offset();
|
||||
VLOG(3) << "consume partition[ " << msg->partition()
|
||||
<< " - " << msg->offset();
|
||||
}
|
||||
|
||||
break;
|
||||
case RdKafka::ERR__TIMED_OUT:
|
||||
// leave the status as OK, because this may happend
|
||||
// if there is no data in kafka.
|
||||
LOG(WARNING) << "kafka consume timeout";
|
||||
break;
|
||||
default:
|
||||
st = Status(msg->errstr());
|
||||
break;
|
||||
}
|
||||
delete msg;
|
||||
|
||||
if (!st.ok()) {
|
||||
_kafka_consumer_pipe->cancel();
|
||||
return st;
|
||||
}
|
||||
|
||||
left_time = _ctx->kafka_info->max_interval_s - watch.elapsed_time() / 1000 / 1000 / 1000;
|
||||
}
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status KafkaDataConsumer::cancel() {
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
if (!_init) {
|
||||
return Status("consumer is not initialized");
|
||||
}
|
||||
|
||||
if (_finished) {
|
||||
return Status("consumer is already finished");
|
||||
}
|
||||
|
||||
_cancelled = true;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
} // end namespace doris
|
||||
98
be/src/runtime/routine_load/data_consumer.h
Normal file
98
be/src/runtime/routine_load/data_consumer.h
Normal file
@ -0,0 +1,98 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#include "librdkafka/rdkafkacpp.h"
|
||||
|
||||
#include "runtime/stream_load/stream_load_context.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class KafkaConsumerPipe;
|
||||
class Status;
|
||||
|
||||
class DataConsumer {
|
||||
public:
|
||||
DataConsumer(StreamLoadContext* ctx):
|
||||
_ctx(ctx),
|
||||
_init(false),
|
||||
_finished(false),
|
||||
_cancelled(false) {
|
||||
|
||||
_ctx->ref();
|
||||
}
|
||||
|
||||
virtual ~DataConsumer() {
|
||||
if (_ctx->unref()) {
|
||||
delete _ctx;
|
||||
}
|
||||
}
|
||||
|
||||
// init the consumer with the given parameters
|
||||
virtual Status init() = 0;
|
||||
|
||||
// start consuming
|
||||
virtual Status start() = 0;
|
||||
|
||||
// cancel the consuming process.
|
||||
// if the consumer is not initialized, or the consuming
|
||||
// process is already finished, call cancel() will
|
||||
// return ERROR
|
||||
virtual Status cancel() = 0;
|
||||
|
||||
protected:
|
||||
StreamLoadContext* _ctx;
|
||||
|
||||
// lock to protect the following bools
|
||||
std::mutex _lock;
|
||||
bool _init;
|
||||
bool _finished;
|
||||
bool _cancelled;
|
||||
};
|
||||
|
||||
class KafkaDataConsumer : public DataConsumer {
|
||||
public:
|
||||
KafkaDataConsumer(
|
||||
StreamLoadContext* ctx,
|
||||
std::shared_ptr<KafkaConsumerPipe> kafka_consumer_pipe
|
||||
):
|
||||
DataConsumer(ctx),
|
||||
_kafka_consumer_pipe(kafka_consumer_pipe) {
|
||||
}
|
||||
|
||||
virtual Status init() override;
|
||||
|
||||
virtual Status start() override;
|
||||
|
||||
virtual Status cancel() override;
|
||||
|
||||
virtual ~KafkaDataConsumer() {
|
||||
if (_k_consumer) {
|
||||
_k_consumer->close();
|
||||
delete _k_consumer;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<KafkaConsumerPipe> _kafka_consumer_pipe;
|
||||
RdKafka::KafkaConsumer* _k_consumer = nullptr;
|
||||
};
|
||||
|
||||
} // end namespace doris
|
||||
@ -15,8 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#ifndef DORIS_BE_SRC_RUNTIME_KAFKA_COMSUMER_PIPE_H
|
||||
#define DORIS_BE_SRC_RUNTIME_KAFKA_COMSUMER_PIPE_H
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
@ -27,24 +26,30 @@
|
||||
#include "librdkafka/rdkafka.h"
|
||||
|
||||
#include "exec/file_reader.h"
|
||||
#include "http/message_body_sink.h"
|
||||
#include "runtime/message_body_sink.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class KafkaConsumerPipe : public MessageBodySink, public FileReader {
|
||||
class KafkaConsumerPipe : public StreamLoadPipe {
|
||||
public:
|
||||
KafkaConsumerPipe();
|
||||
~KafkaConsumerPipe();
|
||||
KafkaConsumerPipe(size_t max_buffered_bytes = 1024 * 1024,
|
||||
size_t min_chunk_size = 64 * 1024)
|
||||
: StreamLoadPipe(max_buffered_bytes, min_chunk_size) {
|
||||
|
||||
}
|
||||
|
||||
private:
|
||||
// this is only for testing librdkafka.a
|
||||
void test_kafka_lib() {
|
||||
//rd_kafka_conf_t *conf = rd_kafka_conf_new();
|
||||
//rd_kafka_topic_conf_t *topic_conf = rd_kafka_topic_conf_new();
|
||||
virtual ~KafkaConsumerPipe() {}
|
||||
|
||||
Status append_with_line_delimiter(const char* data, size_t size) {
|
||||
Status st = append(data, size);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
}
|
||||
|
||||
// append the line delimiter
|
||||
st = append("\n", 1);
|
||||
return st;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace doris
|
||||
|
||||
#endif // DORIS_BE_SRC_RUNTIME_KAFKA_COMSUMER_PIPE_H
|
||||
174
be/src/runtime/routine_load/routine_load_task_executor.cpp
Normal file
174
be/src/runtime/routine_load/routine_load_task_executor.cpp
Normal file
@ -0,0 +1,174 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/routine_load/routine_load_task_executor.h"
|
||||
|
||||
#include "common/status.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/routine_load/data_consumer.h"
|
||||
#include "runtime/routine_load/kafka_consumer_pipe.h"
|
||||
#include "runtime/stream_load/stream_load_context.h"
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
#include "util/uid_util.h"
|
||||
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
#include "gen_cpp/BackendService_types.h"
|
||||
#include "gen_cpp/Types_types.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
Status RoutineLoadTaskExecutor::submit_task(const TRoutineLoadTask& task) {
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
if (_task_map.find(task.id) != _task_map.end()) {
|
||||
// already submitted
|
||||
LOG(INFO) << "routine load task " << task.id << " has already been submitted";
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
// create the context
|
||||
StreamLoadContext* ctx = new StreamLoadContext(_exec_env);
|
||||
ctx->load_type = TLoadType::ROUTINE_LOAD;
|
||||
ctx->load_src_type = task.type;
|
||||
ctx->job_id = task.job_id;
|
||||
ctx->id = UniqueId(task.id);
|
||||
ctx->txn_id = task.txn_id;
|
||||
ctx->db = task.db;
|
||||
ctx->table = task.tbl;
|
||||
ctx->label = task.label;
|
||||
ctx->auth.auth_code = task.auth_code;
|
||||
|
||||
// the routine load task'txn has alreay began in FE.
|
||||
// so it need to rollback if encounter error.
|
||||
ctx->need_rollback = true;
|
||||
|
||||
// set source related params
|
||||
switch (task.type) {
|
||||
case TLoadSourceType::KAFKA:
|
||||
ctx->kafka_info = new KafkaLoadInfo(task.kafka_load_info);
|
||||
break;
|
||||
default:
|
||||
LOG(WARNING) << "unknown load source type: " << task.type;
|
||||
delete ctx;
|
||||
return Status("unknown load source type");
|
||||
}
|
||||
|
||||
VLOG(1) << "receive a new routine load task: " << ctx->brief();
|
||||
// register the task
|
||||
ctx->ref();
|
||||
_task_map[ctx->id] = ctx;
|
||||
|
||||
// offer the task to thread pool
|
||||
if (!_thread_pool->offer(
|
||||
boost::bind<void>(&RoutineLoadTaskExecutor::exec_task, this, ctx,
|
||||
[this] (StreamLoadContext* ctx) {
|
||||
std::unique_lock<std::mutex> l(_lock);
|
||||
_task_map.erase(ctx->id);
|
||||
LOG(INFO) << "finished routine load task " << ctx->brief();
|
||||
if (ctx->unref()) {
|
||||
delete ctx;
|
||||
}
|
||||
}))) {
|
||||
|
||||
// failed to submit task, clear and return
|
||||
LOG(WARNING) << "failed to submit routine load task: " << ctx->brief();
|
||||
_task_map.erase(ctx->id);
|
||||
if (ctx->unref()) {
|
||||
delete ctx;
|
||||
}
|
||||
return Status("failed to submit routine load task");
|
||||
|
||||
} else {
|
||||
LOG(INFO) << "submit a new routine load task: " << ctx->brief()
|
||||
<< ", current tasks num: " << _task_map.size();
|
||||
return Status::OK;
|
||||
}
|
||||
}
|
||||
|
||||
void RoutineLoadTaskExecutor::exec_task(
|
||||
StreamLoadContext* ctx, ExecFinishCallback cb) {
|
||||
|
||||
// create pipe and consumer
|
||||
std::shared_ptr<StreamLoadPipe> pipe;
|
||||
std::shared_ptr<DataConsumer> consumer;
|
||||
switch (ctx->load_src_type) {
|
||||
case TLoadSourceType::KAFKA:
|
||||
pipe = std::make_shared<KafkaConsumerPipe>();
|
||||
consumer = std::make_shared<KafkaDataConsumer>(
|
||||
ctx, std::static_pointer_cast<KafkaConsumerPipe>(pipe));
|
||||
ctx->body_sink = pipe;
|
||||
break;
|
||||
default:
|
||||
std::stringstream ss;
|
||||
ss << "unknown routine load task type: " << ctx->load_type;
|
||||
err_handler(ctx, Status::CANCELLED, ss.str());
|
||||
cb(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
#define HANDLE_ERROR(stmt, err_msg) \
|
||||
do { \
|
||||
Status _status_ = (stmt); \
|
||||
if (UNLIKELY(!_status_.ok())) { \
|
||||
err_handler(ctx, _status_, err_msg); \
|
||||
cb(ctx); \
|
||||
return; \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
HANDLE_ERROR(consumer->init(), "failed to init consumer");
|
||||
|
||||
// must put pipe before executing plan fragment
|
||||
HANDLE_ERROR(_exec_env->load_stream_mgr()->put(ctx->id, pipe), "failed to add pipe");
|
||||
|
||||
// execute plan fragment, async
|
||||
HANDLE_ERROR(_exec_env->stream_load_executor()->execute_plan_fragment(ctx),
|
||||
"failed to execute plan fragment");
|
||||
|
||||
// start to consume, this may block a while
|
||||
HANDLE_ERROR(consumer->start(), "consuming failed");
|
||||
|
||||
// wait for consumer finished
|
||||
HANDLE_ERROR(ctx->future.get(), "consume failed");
|
||||
|
||||
ctx->load_cost_nanos = MonotonicNanos() - ctx->start_nanos;
|
||||
|
||||
// commit txn
|
||||
HANDLE_ERROR(_exec_env->stream_load_executor()->commit_txn(ctx), "commit failed");
|
||||
|
||||
cb(ctx);
|
||||
}
|
||||
|
||||
void RoutineLoadTaskExecutor::err_handler(
|
||||
StreamLoadContext* ctx,
|
||||
const Status& st,
|
||||
const std::string& err_msg) {
|
||||
|
||||
LOG(WARNING) << err_msg;
|
||||
ctx->status = st;
|
||||
if (ctx->need_rollback) {
|
||||
_exec_env->stream_load_executor()->rollback_txn(ctx);
|
||||
ctx->need_rollback = false;
|
||||
}
|
||||
if (ctx->body_sink.get() != nullptr) {
|
||||
ctx->body_sink->cancel();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
|
||||
75
be/src/runtime/routine_load/routine_load_task_executor.h
Normal file
75
be/src/runtime/routine_load/routine_load_task_executor.h
Normal file
@ -0,0 +1,75 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "util/thread_pool.hpp"
|
||||
#include "util/uid_util.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class ExecEnv;
|
||||
class Status;
|
||||
class StreamLoadContext;
|
||||
class TRoutineLoadTask;
|
||||
|
||||
// A routine load task executor will receive routine load
|
||||
// tasks from FE, put it to a fixed thread pool.
|
||||
// The thread pool will process each task and report the result
|
||||
// to FE finally.
|
||||
class RoutineLoadTaskExecutor {
|
||||
public:
|
||||
// paramater: task id
|
||||
typedef std::function<void (StreamLoadContext*)> ExecFinishCallback;
|
||||
|
||||
RoutineLoadTaskExecutor(ExecEnv* exec_env):
|
||||
_exec_env(exec_env) {
|
||||
_thread_pool = new ThreadPool(10, 1000);
|
||||
}
|
||||
|
||||
~RoutineLoadTaskExecutor() {
|
||||
if (_thread_pool) {
|
||||
delete _thread_pool;
|
||||
}
|
||||
}
|
||||
|
||||
// submit a routine load task
|
||||
Status submit_task(const TRoutineLoadTask& task);
|
||||
|
||||
private:
|
||||
// execute the task
|
||||
void exec_task(StreamLoadContext* ctx, ExecFinishCallback cb);
|
||||
|
||||
void err_handler(
|
||||
StreamLoadContext* ctx,
|
||||
const Status& st,
|
||||
const std::string& err_msg);
|
||||
|
||||
private:
|
||||
ExecEnv* _exec_env;
|
||||
ThreadPool* _thread_pool;
|
||||
|
||||
std::mutex _lock;
|
||||
// task id -> load context
|
||||
std::unordered_map<UniqueId, StreamLoadContext*> _task_map;
|
||||
};
|
||||
|
||||
} // end namespace
|
||||
@ -28,13 +28,13 @@
|
||||
#include "common/logging.h"
|
||||
#include "exec/broker_reader.h"
|
||||
#include "exec/broker_writer.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "olap/file_helper.h"
|
||||
#include "olap/olap_engine.h"
|
||||
#include "olap/olap_table.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/broker_mgr.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "util/frontend_helper.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "runtime/stream_load_pipe.h" // for StreamLoadPipe
|
||||
#include "runtime/stream_load/stream_load_pipe.h" // for StreamLoadPipe
|
||||
#include "util/uid_util.h" // for std::hash for UniqueId
|
||||
|
||||
namespace doris {
|
||||
@ -54,6 +54,15 @@ public:
|
||||
return stream;
|
||||
}
|
||||
|
||||
void remove(const UniqueId& id) {
|
||||
std::lock_guard<std::mutex> l(_lock);
|
||||
auto it = _stream_map.find(id);
|
||||
if (it != std::end(_stream_map)) {
|
||||
_stream_map.erase(it);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex _lock;
|
||||
std::unordered_map<UniqueId, std::shared_ptr<StreamLoadPipe>> _stream_map;
|
||||
100
be/src/runtime/stream_load/stream_load_context.cpp
Normal file
100
be/src/runtime/stream_load/stream_load_context.cpp
Normal file
@ -0,0 +1,100 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/stream_load/stream_load_context.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
std::string StreamLoadContext::to_json() const {
|
||||
rapidjson::StringBuffer s;
|
||||
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(s);
|
||||
|
||||
writer.StartObject();
|
||||
// txn id
|
||||
writer.Key("TxnId");
|
||||
writer.Int64(txn_id);
|
||||
|
||||
// label
|
||||
writer.Key("Label");
|
||||
writer.String(label.c_str());
|
||||
|
||||
// status
|
||||
writer.Key("Status");
|
||||
switch (status.code()) {
|
||||
case TStatusCode::OK:
|
||||
writer.String("Success");
|
||||
break;
|
||||
case TStatusCode::PUBLISH_TIMEOUT:
|
||||
writer.String("Publish Timeout");
|
||||
break;
|
||||
case TStatusCode::LABEL_ALREADY_EXISTS:
|
||||
writer.String("Label Already Exists");
|
||||
break;
|
||||
default:
|
||||
writer.String("Fail");
|
||||
break;
|
||||
}
|
||||
// msg
|
||||
writer.Key("Message");
|
||||
if (status.ok()) {
|
||||
writer.String("OK");
|
||||
} else {
|
||||
writer.String(status.get_error_msg().c_str());
|
||||
}
|
||||
// number_load_rows
|
||||
writer.Key("NumberLoadedRows");
|
||||
writer.Int64(number_loaded_rows);
|
||||
writer.Key("NumberFilteredRows");
|
||||
writer.Int64(number_filtered_rows);
|
||||
writer.Key("LoadBytes");
|
||||
writer.Int64(receive_bytes);
|
||||
writer.Key("LoadTimeMs");
|
||||
writer.Int64(load_cost_nanos / 1000000);
|
||||
if (!error_url.empty()) {
|
||||
writer.Key("ErrorURL");
|
||||
writer.String(error_url.c_str());
|
||||
}
|
||||
writer.EndObject();
|
||||
return s.GetString();
|
||||
}
|
||||
|
||||
std::string StreamLoadContext::brief(bool detail) const {
|
||||
std::stringstream ss;
|
||||
ss << " id=" << id << ", txn id=" << txn_id << ", label=" << label;
|
||||
if (detail) {
|
||||
switch(load_src_type) {
|
||||
case TLoadSourceType::KAFKA:
|
||||
if (kafka_info != nullptr) {
|
||||
ss << ", kafka"
|
||||
<< ", brokers: " << kafka_info->brokers
|
||||
<< ", group_id: " << kafka_info->group_id
|
||||
<< ", client_id: " << kafka_info->client_id
|
||||
<< ", topic: " << kafka_info->topic
|
||||
<< ", partition: ";
|
||||
for (auto& entry : kafka_info->begin_offset) {
|
||||
ss << "[" << entry.first << ": " << entry.second << "]";
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
163
be/src/runtime/stream_load/stream_load_context.h
Normal file
163
be/src/runtime/stream_load/stream_load_context.h
Normal file
@ -0,0 +1,163 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <sstream>
|
||||
#include <rapidjson/prettywriter.h>
|
||||
|
||||
#include "gen_cpp/BackendService_types.h"
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
|
||||
#include "common/status.h"
|
||||
#include "common/logging.h"
|
||||
#include "common/utils.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
#include "util/time.h"
|
||||
#include "util/uid_util.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// kafka related info
|
||||
class KafkaLoadInfo {
|
||||
public:
|
||||
KafkaLoadInfo(const TKafkaLoadInfo& t_info):
|
||||
brokers(t_info.brokers),
|
||||
group_id(t_info.group_id),
|
||||
client_id(t_info.client_id),
|
||||
topic(t_info.topic),
|
||||
max_interval_s(t_info.max_interval_s),
|
||||
max_batch_rows(t_info.max_batch_rows),
|
||||
max_batch_bytes(t_info.max_batch_size),
|
||||
begin_offset(t_info.partition_begin_offset) {
|
||||
}
|
||||
|
||||
public:
|
||||
std::string brokers;
|
||||
std::string group_id;
|
||||
std::string client_id;
|
||||
std::string topic;
|
||||
|
||||
// the following members control the max progress of a consuming
|
||||
// process. if any of them reach, the consuming will finish.
|
||||
int64_t max_interval_s;
|
||||
int64_t max_batch_rows;
|
||||
int64_t max_batch_bytes;
|
||||
|
||||
// partition -> begin offset, inclusive.
|
||||
std::map<int32_t, int64_t> begin_offset;
|
||||
// partiton -> commit offset, inclusive.
|
||||
std::map<int32_t, int64_t> cmt_offset;
|
||||
};
|
||||
|
||||
class MessageBodySink;
|
||||
|
||||
class StreamLoadContext {
|
||||
public:
|
||||
StreamLoadContext(ExecEnv* exec_env) :
|
||||
_exec_env(exec_env),
|
||||
_refs(0) {
|
||||
start_nanos = MonotonicNanos();
|
||||
}
|
||||
|
||||
~StreamLoadContext() {
|
||||
if (need_rollback) {
|
||||
_exec_env->stream_load_executor()->rollback_txn(this);
|
||||
need_rollback = false;
|
||||
}
|
||||
|
||||
_exec_env->load_stream_mgr()->remove(id);
|
||||
|
||||
if (kafka_info != nullptr) {
|
||||
delete kafka_info;
|
||||
}
|
||||
}
|
||||
|
||||
void rollback();
|
||||
|
||||
std::string to_json() const;
|
||||
|
||||
// return the brief info of this context.
|
||||
// also print the load source info if detail is set to true
|
||||
std::string brief(bool detail = false) const;
|
||||
|
||||
void ref() { _refs.fetch_add(1); }
|
||||
// If unref() returns true, this object should be delete
|
||||
bool unref() { return _refs.fetch_sub(1) == 1; }
|
||||
|
||||
public:
|
||||
// load type, eg: ROUTINE LOAD/MANUL LOAD
|
||||
TLoadType::type load_type;
|
||||
// load data source: eg: KAFKA/RAW
|
||||
TLoadSourceType::type load_src_type;
|
||||
|
||||
// the job this stream load task belongs to,
|
||||
// set to -1 if there is no job
|
||||
int64_t job_id = -1;
|
||||
|
||||
// id for each load
|
||||
UniqueId id;
|
||||
|
||||
std::string db;
|
||||
std::string table;
|
||||
std::string label;
|
||||
|
||||
std::string user_ip;
|
||||
|
||||
AuthInfo auth;
|
||||
|
||||
// only used to check if we receive whole body
|
||||
size_t body_bytes = 0;
|
||||
size_t receive_bytes = 0;
|
||||
|
||||
int64_t txn_id = -1;
|
||||
|
||||
bool need_rollback = false;
|
||||
// when use_streaming is true, we use stream_pipe to send source data,
|
||||
// otherwise we save source data to file first, then process it.
|
||||
bool use_streaming = false;
|
||||
TFileFormatType::type format = TFileFormatType::FORMAT_CSV_PLAIN;
|
||||
|
||||
std::shared_ptr<MessageBodySink> body_sink;
|
||||
|
||||
TStreamLoadPutResult put_result;
|
||||
double max_filter_ratio = 0.0;
|
||||
std::vector<TTabletCommitInfo> commit_infos;
|
||||
|
||||
std::promise<Status> promise;
|
||||
std::future<Status> future = promise.get_future();
|
||||
|
||||
Status status;
|
||||
|
||||
int64_t number_loaded_rows = 0;
|
||||
int64_t number_filtered_rows = 0;
|
||||
int64_t loaded_bytes = 0;
|
||||
int64_t start_nanos = 0;
|
||||
int64_t load_cost_nanos = 0;
|
||||
std::string error_url;
|
||||
|
||||
KafkaLoadInfo* kafka_info = nullptr;
|
||||
|
||||
private:
|
||||
ExecEnv* _exec_env;
|
||||
std::atomic<int> _refs;
|
||||
};
|
||||
|
||||
} // end namespace
|
||||
243
be/src/runtime/stream_load/stream_load_executor.cpp
Normal file
243
be/src/runtime/stream_load/stream_load_executor.cpp
Normal file
@ -0,0 +1,243 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
|
||||
#include "common/status.h"
|
||||
#include "common/utils.h"
|
||||
#include "runtime/client_cache.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/fragment_mgr.h"
|
||||
#include "runtime/plan_fragment_executor.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/stream_load/stream_load_context.h"
|
||||
#include "util/frontend_helper.h"
|
||||
|
||||
#include "gen_cpp/FrontendService.h"
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
#include "gen_cpp/HeartbeatService_types.h"
|
||||
#include "gen_cpp/Types_types.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
#ifdef BE_TEST
|
||||
TLoadTxnBeginResult k_stream_load_begin_result;
|
||||
TLoadTxnCommitResult k_stream_load_commit_result;
|
||||
TLoadTxnRollbackResult k_stream_load_rollback_result;
|
||||
Status k_stream_load_plan_status;
|
||||
#endif
|
||||
|
||||
Status StreamLoadExecutor::execute_plan_fragment(StreamLoadContext* ctx) {
|
||||
// submit this params
|
||||
#ifndef BE_TEST
|
||||
ctx->ref();
|
||||
auto st = _exec_env->fragment_mgr()->exec_plan_fragment(
|
||||
ctx->put_result.params,
|
||||
[ctx] (PlanFragmentExecutor* executor) {
|
||||
ctx->commit_infos = std::move(executor->runtime_state()->tablet_commit_infos());
|
||||
Status status = executor->status();
|
||||
if (status.ok()) {
|
||||
ctx->number_loaded_rows = executor->runtime_state()->num_rows_load_success();
|
||||
ctx->number_filtered_rows = executor->runtime_state()->num_rows_load_filtered();
|
||||
int64_t num_total_rows =
|
||||
ctx->number_loaded_rows + ctx->number_filtered_rows;
|
||||
if ((0.0 + ctx->number_filtered_rows) / num_total_rows > ctx->max_filter_ratio) {
|
||||
status = Status("too many filtered rows");
|
||||
}
|
||||
if (ctx->number_filtered_rows > 0 &&
|
||||
!executor->runtime_state()->get_error_log_file_path().empty()) {
|
||||
|
||||
if (ctx->load_type == TLoadType::MANUL_LOAD) {
|
||||
ctx->error_url = to_load_error_http_path(
|
||||
executor->runtime_state()->get_error_log_file_path());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG(WARNING) << "fragment execute failed"
|
||||
<< ", query_id=" << UniqueId(ctx->put_result.params.params.query_id)
|
||||
<< ", errmsg=" << status.get_error_msg()
|
||||
<< ctx->brief();
|
||||
// cancel body_sink, make sender known it
|
||||
if (ctx->body_sink != nullptr) {
|
||||
ctx->body_sink->cancel();
|
||||
}
|
||||
}
|
||||
ctx->promise.set_value(status);
|
||||
if (ctx->unref()) {
|
||||
delete ctx;
|
||||
}
|
||||
});
|
||||
if (!st.ok()) {
|
||||
// no need to check unref's return value
|
||||
ctx->unref();
|
||||
return st;
|
||||
}
|
||||
#else
|
||||
ctx->promise.set_value(k_stream_load_plan_status);
|
||||
#endif
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status StreamLoadExecutor::begin_txn(StreamLoadContext* ctx) {
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
|
||||
TLoadTxnBeginRequest request;
|
||||
set_request_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.label = ctx->label;
|
||||
// set timestamp
|
||||
request.__set_timestamp(GetCurrentTimeMicros());
|
||||
|
||||
TLoadTxnBeginResult result;
|
||||
#ifndef BE_TEST
|
||||
RETURN_IF_ERROR(FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnBegin(result, request);
|
||||
}));
|
||||
#else
|
||||
result = k_stream_load_begin_result;
|
||||
#endif
|
||||
Status status(result.status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "begin transaction failed, errmsg=" << status.get_error_msg()
|
||||
<< ctx->brief();
|
||||
return status;
|
||||
}
|
||||
ctx->txn_id = result.txnId;
|
||||
ctx->need_rollback = true;
|
||||
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
Status StreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
|
||||
TLoadTxnCommitRequest request;
|
||||
set_request_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.txnId = ctx->txn_id;
|
||||
request.sync = true;
|
||||
request.commitInfos = std::move(ctx->commit_infos);
|
||||
request.__isset.commitInfos = true;
|
||||
|
||||
// set attachment if has
|
||||
TTxnCommitAttachment attachment;
|
||||
if (collect_load_stat(ctx, &attachment)) {
|
||||
request.txnCommitAttachment = std::move(attachment);
|
||||
request.__isset.txnCommitAttachment = true;
|
||||
}
|
||||
|
||||
TLoadTxnCommitResult result;
|
||||
#ifndef BE_TEST
|
||||
RETURN_IF_ERROR(FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnCommit(result, request);
|
||||
}, config::txn_commit_rpc_timeout_ms));
|
||||
#else
|
||||
result = k_stream_load_commit_result;
|
||||
#endif
|
||||
// Return if this transaction is committed successful; otherwise, we need try to
|
||||
// rollback this transaction
|
||||
Status status(result.status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "commit transaction failed, id=" << ctx->id
|
||||
<< ", errmsg=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
// commit success, set need_rollback to false
|
||||
ctx->need_rollback = false;
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
void StreamLoadExecutor::rollback_txn(StreamLoadContext* ctx) {
|
||||
TNetworkAddress master_addr = _exec_env->master_info()->network_address;
|
||||
TLoadTxnRollbackRequest request;
|
||||
set_request_auth(&request, ctx->auth);
|
||||
request.db = ctx->db;
|
||||
request.tbl = ctx->table;
|
||||
request.txnId = ctx->txn_id;
|
||||
request.__set_reason(ctx->status.get_error_msg());
|
||||
TLoadTxnRollbackResult result;
|
||||
|
||||
// set attachment if has
|
||||
TTxnCommitAttachment attachment;
|
||||
if (collect_load_stat(ctx, &attachment)) {
|
||||
request.txnCommitAttachment = std::move(attachment);
|
||||
request.__isset.txnCommitAttachment = true;
|
||||
}
|
||||
|
||||
#ifndef BE_TEST
|
||||
auto rpc_st = FrontendHelper::rpc(
|
||||
master_addr.hostname, master_addr.port,
|
||||
[&request, &result] (FrontendServiceConnection& client) {
|
||||
client->loadTxnRollback(result, request);
|
||||
});
|
||||
if (!rpc_st.ok()) {
|
||||
LOG(WARNING) << "transaction rollback failed. errmsg=" << rpc_st.get_error_msg()
|
||||
<< ctx->brief();
|
||||
}
|
||||
#else
|
||||
result = k_stream_load_rollback_result;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool StreamLoadExecutor::collect_load_stat(StreamLoadContext* ctx, TTxnCommitAttachment* attach) {
|
||||
if (ctx->load_type != TLoadType::ROUTINE_LOAD) {
|
||||
// currently, only routine load need to set attachment
|
||||
return false;
|
||||
}
|
||||
|
||||
switch(ctx->load_src_type) {
|
||||
case TLoadSourceType::KAFKA: {
|
||||
attach->loadType = TLoadType::ROUTINE_LOAD;
|
||||
|
||||
TRLTaskTxnCommitAttachment rl_attach;
|
||||
rl_attach.loadSourceType = TLoadSourceType::KAFKA;
|
||||
rl_attach.jobId = ctx->job_id;
|
||||
rl_attach.id = ctx->id.to_thrift();
|
||||
rl_attach.__set_loadedRows(ctx->number_loaded_rows);
|
||||
rl_attach.__set_filteredRows(ctx->number_filtered_rows);
|
||||
rl_attach.__set_receivedBytes(ctx->receive_bytes);
|
||||
rl_attach.__set_loadedBytes(ctx->loaded_bytes);
|
||||
rl_attach.__set_loadCostMs(ctx->load_cost_nanos / 1000 / 1000);
|
||||
|
||||
if (ctx->status.ok()) {
|
||||
TKafkaRLTaskProgress kafka_progress;
|
||||
kafka_progress.partitionCmtOffset = std::move(ctx->kafka_info->cmt_offset);
|
||||
rl_attach.kafkaRLTaskProgress = std::move(kafka_progress);
|
||||
rl_attach.__isset.kafkaRLTaskProgress = true;
|
||||
}
|
||||
|
||||
attach->rlTaskTxnCommitAttachment = std::move(rl_attach);
|
||||
attach->__isset.rlTaskTxnCommitAttachment = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
case TLoadSourceType::RAW:
|
||||
return false;
|
||||
default:
|
||||
// unknown type, should not happend
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // end namespace
|
||||
51
be/src/runtime/stream_load/stream_load_executor.h
Normal file
51
be/src/runtime/stream_load/stream_load_executor.h
Normal file
@ -0,0 +1,51 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace doris {
|
||||
|
||||
class ExecEnv;
|
||||
class StreamLoadContext;
|
||||
class Status;
|
||||
class TTxnCommitAttachment;
|
||||
|
||||
class StreamLoadExecutor {
|
||||
|
||||
public:
|
||||
StreamLoadExecutor(ExecEnv* exec_env):
|
||||
_exec_env(exec_env) {
|
||||
}
|
||||
|
||||
Status begin_txn(StreamLoadContext* ctx);
|
||||
|
||||
Status commit_txn(StreamLoadContext* ctx);
|
||||
|
||||
void rollback_txn(StreamLoadContext* ctx);
|
||||
|
||||
Status execute_plan_fragment(StreamLoadContext* ctx);
|
||||
|
||||
private:
|
||||
// collect the load statistics from context and set them to stat
|
||||
// return true if stat is set, otherwise, return false
|
||||
bool collect_load_stat(StreamLoadContext* ctx, TTxnCommitAttachment* attachment);
|
||||
|
||||
private:
|
||||
ExecEnv* _exec_env;
|
||||
};
|
||||
|
||||
}
|
||||
@ -22,7 +22,7 @@
|
||||
#include <mutex>
|
||||
|
||||
#include "exec/file_reader.h"
|
||||
#include "http/message_body_sink.h"
|
||||
#include "runtime/message_body_sink.h"
|
||||
#include "util/bit_util.h"
|
||||
#include "util/byte_buffer.h"
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
#include "runtime/pull_load_task_mgr.h"
|
||||
#include "runtime/export_task_mgr.h"
|
||||
#include "runtime/result_buffer_mgr.h"
|
||||
#include "runtime/routine_load/routine_load_task_executor.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -228,4 +229,10 @@ void BackendService::get_tablet_stat(TTabletStatResult& result) {
|
||||
OLAPEngine::get_instance()->get_tablet_stat(result);
|
||||
}
|
||||
|
||||
void BackendService::submit_routine_load_task(
|
||||
TStatus& t_status, const TRoutineLoadTask& task) {
|
||||
Status status = _exec_env->routine_load_task_executor()->submit_task(task);
|
||||
status.to_thrift(&t_status);
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
|
||||
@ -147,6 +147,8 @@ public:
|
||||
|
||||
virtual void get_tablet_stat(TTabletStatResult& result) override;
|
||||
|
||||
virtual void submit_routine_load_task(TStatus& t_status, const TRoutineLoadTask& task) override;
|
||||
|
||||
private:
|
||||
Status start_plan_fragment_execution(const TExecPlanFragmentParams& exec_params);
|
||||
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
#include "service/http_service.h"
|
||||
#include <gperftools/profiler.h>
|
||||
#include "common/resource_tls.h"
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "util/frontend_helper.h"
|
||||
|
||||
static void help(const char*);
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ add_library(Util STATIC
|
||||
parse_util.cpp
|
||||
path_builder.cpp
|
||||
# TODO: not supported on RHEL 5
|
||||
# perf-counters.cpp
|
||||
# perf-counters.cpp
|
||||
progress_updater.cpp
|
||||
runtime_profile.cpp
|
||||
static_asserts.cpp
|
||||
@ -67,13 +67,14 @@ add_library(Util STATIC
|
||||
null_load_error_hub.cpp
|
||||
time.cpp
|
||||
os_info.cpp
|
||||
# coding_util.cpp
|
||||
# coding_util.cpp
|
||||
cidr.cpp
|
||||
core_local.cpp
|
||||
uid_util.cpp
|
||||
aes_util.cpp
|
||||
string_util.cpp
|
||||
md5.cpp
|
||||
frontend_helper.cpp
|
||||
)
|
||||
|
||||
#ADD_BE_TEST(integer-array-test)
|
||||
|
||||
91
be/src/util/frontend_helper.cpp
Normal file
91
be/src/util/frontend_helper.cpp
Normal file
@ -0,0 +1,91 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "util/frontend_helper.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/thread/locks.hpp>
|
||||
#include <boost/thread/thread.hpp>
|
||||
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
#include "gen_cpp/FrontendService.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/client_cache.h"
|
||||
#include "util/network_util.h"
|
||||
#include "util/thrift_util.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "runtime/client_cache.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
ExecEnv* FrontendHelper::_s_exec_env;
|
||||
|
||||
using apache::thrift::protocol::TProtocol;
|
||||
using apache::thrift::protocol::TBinaryProtocol;
|
||||
using apache::thrift::transport::TSocket;
|
||||
using apache::thrift::transport::TTransport;
|
||||
using apache::thrift::transport::TBufferedTransport;
|
||||
|
||||
void FrontendHelper::setup(ExecEnv* exec_env) {
|
||||
_s_exec_env = exec_env;
|
||||
}
|
||||
|
||||
Status FrontendHelper::rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (FrontendServiceConnection&)> callback,
|
||||
int timeout_ms) {
|
||||
TNetworkAddress address = make_network_address(ip, port);
|
||||
Status status;
|
||||
FrontendServiceConnection client(
|
||||
_s_exec_env->frontend_client_cache(), address, timeout_ms, &status);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "Connect frontent failed, address=" << address
|
||||
<< ", status=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
try {
|
||||
try {
|
||||
callback(client);
|
||||
} catch (apache::thrift::transport::TTransportException& e) {
|
||||
LOG(WARNING) << "retrying call frontend service, address="
|
||||
<< address << ", reason=" << e.what();
|
||||
status = client.reopen(timeout_ms);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "client repoen failed. address=" << address
|
||||
<< ", status=" << status.get_error_msg();
|
||||
return status;
|
||||
}
|
||||
callback(client);
|
||||
}
|
||||
} catch (apache::thrift::TException& e) {
|
||||
// just reopen to disable this connection
|
||||
client.reopen(timeout_ms);
|
||||
LOG(WARNING) << "call frontend service failed, address=" << address
|
||||
<< ", reason=" << e.what();
|
||||
return Status(TStatusCode::THRIFT_RPC_ERROR,
|
||||
"failed to call frontend service", false);
|
||||
}
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
54
be/src/util/frontend_helper.h
Normal file
54
be/src/util/frontend_helper.h
Normal file
@ -0,0 +1,54 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/status.h"
|
||||
#include "gen_cpp/FrontendService_types.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class ExecEnv;
|
||||
class FrontendServiceClient;
|
||||
template <class T> class ClientConnection;
|
||||
|
||||
// this class is a helper for jni call. easy for unit test
|
||||
class FrontendHelper {
|
||||
public:
|
||||
static void setup(ExecEnv* exec_env);
|
||||
|
||||
// for default timeout
|
||||
static Status rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (ClientConnection<FrontendServiceClient>&)> callback) {
|
||||
|
||||
return rpc(ip, port, callback, config::thrift_rpc_timeout_ms);
|
||||
}
|
||||
|
||||
static Status rpc(
|
||||
const std::string& ip,
|
||||
const int32_t port,
|
||||
std::function<void (ClientConnection<FrontendServiceClient>&)> callback,
|
||||
int timeout_ms);
|
||||
|
||||
private:
|
||||
static ExecEnv* _s_exec_env;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -23,11 +23,11 @@
|
||||
#include "gen_cpp/internal_service.pb.h"
|
||||
#include "runtime/decimal_value.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/load_stream_mgr.h"
|
||||
#include "runtime/row_batch.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/thread_resource_mgr.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "service/brpc.h"
|
||||
#include "util/brpc_stub_cache.h"
|
||||
#include "util/cpu_info.h"
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "http/message_body_sink.h"
|
||||
#include "runtime/message_body_sink.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
@ -23,12 +23,13 @@
|
||||
#include <event2/http_struct.h>
|
||||
#include <rapidjson/document.h>
|
||||
|
||||
#include "exec/schema_scanner/frontend_helper.h"
|
||||
#include "exec/schema_scanner/schema_helper.h"
|
||||
#include "gen_cpp/HeartbeatService_types.h"
|
||||
#include "http/http_channel.h"
|
||||
#include "http/http_request.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/load_stream_mgr.h"
|
||||
#include "runtime/stream_load/stream_load_executor.h"
|
||||
#include "runtime/thread_resource_mgr.h"
|
||||
#include "util/brpc_stub_cache.h"
|
||||
#include "util/cpu_info.h"
|
||||
@ -81,6 +82,7 @@ public:
|
||||
_env._master_info = new TMasterInfo();
|
||||
_env._load_stream_mgr = new LoadStreamMgr();
|
||||
_env._brpc_stub_cache = new BrpcStubCache();
|
||||
_env._stream_load_executor = new StreamLoadExecutor(&_env);
|
||||
|
||||
_evhttp_req = evhttp_request_new(nullptr, nullptr);
|
||||
}
|
||||
@ -93,6 +95,8 @@ public:
|
||||
_env._master_info = nullptr;
|
||||
delete _env._thread_mgr;
|
||||
_env._thread_mgr = nullptr;
|
||||
delete _env._stream_load_executor;
|
||||
_env._stream_load_executor = nullptr;
|
||||
|
||||
if (_evhttp_req != nullptr) {
|
||||
evhttp_request_free(_evhttp_req);
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "runtime/stream_load_pipe.h"
|
||||
#include "runtime/stream_load/stream_load_pipe.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
@ -17,10 +17,10 @@
|
||||
|
||||
package org.apache.doris.load.routineload;
|
||||
|
||||
import org.apache.doris.thrift.TKafkaRLTaskProgress;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.doris.common.io.Writable;
|
||||
import org.apache.doris.thrift.TKafkaRLTaskProgress;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
@ -42,7 +42,7 @@ public class KafkaProgress extends RoutineLoadProgress {
|
||||
}
|
||||
|
||||
public KafkaProgress(TKafkaRLTaskProgress tKafkaRLTaskProgress) {
|
||||
this.partitionIdToOffset = tKafkaRLTaskProgress.getPartitionIdToOffset();
|
||||
this.partitionIdToOffset = tKafkaRLTaskProgress.getPartitionCmtOffset();
|
||||
}
|
||||
|
||||
public Map<Integer, Long> getPartitionIdToOffset() {
|
||||
|
||||
@ -17,8 +17,8 @@
|
||||
|
||||
package org.apache.doris.load.routineload;
|
||||
|
||||
import org.apache.doris.common.io.Text;
|
||||
import org.apache.doris.thrift.TRLTaskTxnCommitAttachment;
|
||||
import org.apache.doris.thrift.TUniqueId;
|
||||
import org.apache.doris.transaction.TxnCommitAttachment;
|
||||
|
||||
import java.io.DataInput;
|
||||
@ -29,12 +29,12 @@ import java.io.IOException;
|
||||
// "numOfTotalData": "", "taskId": "", "jobId": ""}
|
||||
public class RLTaskTxnCommitAttachment extends TxnCommitAttachment {
|
||||
|
||||
public enum RoutineLoadType {
|
||||
public enum LoadSourceType {
|
||||
KAFKA(1);
|
||||
|
||||
private final int flag;
|
||||
|
||||
private RoutineLoadType(int flag) {
|
||||
private LoadSourceType(int flag) {
|
||||
this.flag = flag;
|
||||
}
|
||||
|
||||
@ -42,7 +42,7 @@ public class RLTaskTxnCommitAttachment extends TxnCommitAttachment {
|
||||
return flag;
|
||||
}
|
||||
|
||||
public static RoutineLoadType valueOf(int flag) {
|
||||
public static LoadSourceType valueOf(int flag) {
|
||||
switch (flag) {
|
||||
case 1:
|
||||
return KAFKA;
|
||||
@ -52,93 +52,55 @@ public class RLTaskTxnCommitAttachment extends TxnCommitAttachment {
|
||||
}
|
||||
}
|
||||
|
||||
private long jobId;
|
||||
private TUniqueId taskId;
|
||||
private long filteredRows;
|
||||
private long loadedRows;
|
||||
private RoutineLoadProgress progress;
|
||||
private long backendId;
|
||||
private long taskSignature;
|
||||
private int numOfErrorData;
|
||||
private int numOfTotalData;
|
||||
private String taskId;
|
||||
private String jobId;
|
||||
private RoutineLoadType routineLoadType;
|
||||
private LoadSourceType loadSourceType;
|
||||
|
||||
public RLTaskTxnCommitAttachment() {
|
||||
}
|
||||
|
||||
public RLTaskTxnCommitAttachment(TRLTaskTxnCommitAttachment rlTaskTxnCommitAttachment) {
|
||||
this.backendId = rlTaskTxnCommitAttachment.getBackendId();
|
||||
this.taskSignature = rlTaskTxnCommitAttachment.getTaskSignature();
|
||||
this.numOfErrorData = rlTaskTxnCommitAttachment.getNumOfErrorData();
|
||||
this.numOfTotalData = rlTaskTxnCommitAttachment.getNumOfTotalData();
|
||||
this.taskId = rlTaskTxnCommitAttachment.getTaskId();
|
||||
this.jobId = rlTaskTxnCommitAttachment.getJobId();
|
||||
switch (rlTaskTxnCommitAttachment.getRoutineLoadType()) {
|
||||
this.taskId = rlTaskTxnCommitAttachment.getId();
|
||||
this.filteredRows = rlTaskTxnCommitAttachment.getFilteredRows();
|
||||
this.loadedRows = rlTaskTxnCommitAttachment.getLoadedRows();
|
||||
|
||||
switch (rlTaskTxnCommitAttachment.getLoadSourceType()) {
|
||||
case KAFKA:
|
||||
this.loadSourceType = LoadSourceType.KAFKA;
|
||||
this.progress = new KafkaProgress(rlTaskTxnCommitAttachment.getKafkaRLTaskProgress());
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public long getJobId() {
|
||||
return jobId;
|
||||
}
|
||||
|
||||
public TUniqueId getTaskId() {
|
||||
return taskId;
|
||||
}
|
||||
|
||||
public long getFilteredRows() {
|
||||
return filteredRows;
|
||||
}
|
||||
|
||||
public long getLoadedRows() {
|
||||
return loadedRows;
|
||||
}
|
||||
|
||||
public RoutineLoadProgress getProgress() {
|
||||
return progress;
|
||||
}
|
||||
|
||||
public void setProgress(RoutineLoadProgress progress) {
|
||||
this.progress = progress;
|
||||
}
|
||||
|
||||
public long getBackendId() {
|
||||
return backendId;
|
||||
}
|
||||
|
||||
public void setBackendId(long backendId) {
|
||||
this.backendId = backendId;
|
||||
}
|
||||
|
||||
public long getTaskSignature() {
|
||||
return taskSignature;
|
||||
}
|
||||
|
||||
public void setTaskSignature(long taskSignature) {
|
||||
this.taskSignature = taskSignature;
|
||||
}
|
||||
|
||||
public int getNumOfErrorData() {
|
||||
return numOfErrorData;
|
||||
}
|
||||
|
||||
public void setNumOfErrorData(int numOfErrorData) {
|
||||
this.numOfErrorData = numOfErrorData;
|
||||
}
|
||||
|
||||
public int getNumOfTotalData() {
|
||||
return numOfTotalData;
|
||||
}
|
||||
|
||||
public void setNumOfTotalData(int numOfTotalData) {
|
||||
this.numOfTotalData = numOfTotalData;
|
||||
}
|
||||
|
||||
public String getTaskId() {
|
||||
return taskId;
|
||||
}
|
||||
|
||||
public void setTaskId(String taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
|
||||
public String getJobId() {
|
||||
return jobId;
|
||||
}
|
||||
|
||||
public void setJobId(String jobId) {
|
||||
this.jobId = jobId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "RoutineLoadTaskTxnExtra [backendId=" + backendId
|
||||
+ ", taskSignature=" + taskSignature
|
||||
+ ", numOfErrorData=" + numOfErrorData
|
||||
+ ", numOfTotalData=" + numOfTotalData
|
||||
return "RoutineLoadTaskTxnExtra [filteredRows=" + filteredRows
|
||||
+ ", loadedRows=" + loadedRows
|
||||
+ ", taskId=" + taskId
|
||||
+ ", jobId=" + jobId
|
||||
+ ", progress=" + progress.toString() + "]";
|
||||
@ -146,30 +108,11 @@ public class RLTaskTxnCommitAttachment extends TxnCommitAttachment {
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeLong(backendId);
|
||||
out.writeLong(taskSignature);
|
||||
out.writeInt(numOfErrorData);
|
||||
out.writeInt(numOfTotalData);
|
||||
Text.writeString(out, taskId);
|
||||
Text.writeString(out, jobId);
|
||||
out.writeInt(routineLoadType.value());
|
||||
progress.write(out);
|
||||
// TODO: think twice
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
backendId = in.readLong();
|
||||
taskSignature = in.readLong();
|
||||
numOfErrorData = in.readInt();
|
||||
numOfTotalData = in.readInt();
|
||||
taskId = Text.readString(in);
|
||||
jobId = Text.readString(in);
|
||||
routineLoadType = RoutineLoadType.valueOf(in.readInt());
|
||||
switch (routineLoadType) {
|
||||
case KAFKA:
|
||||
KafkaProgress kafkaProgress = new KafkaProgress();
|
||||
kafkaProgress.readFields(in);
|
||||
progress = kafkaProgress;
|
||||
}
|
||||
// TODO: think twice
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,16 +34,13 @@ import org.apache.doris.load.TxnStateChangeListener;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.service.ExecuteEnv;
|
||||
import org.apache.doris.service.FrontendServiceImpl;
|
||||
import org.apache.doris.task.AgentTaskQueue;
|
||||
import org.apache.doris.thrift.TLoadTxnCommitRequest;
|
||||
import org.apache.doris.thrift.TResourceInfo;
|
||||
import org.apache.doris.thrift.TTaskType;
|
||||
import org.apache.doris.transaction.AbortTransactionException;
|
||||
import org.apache.doris.transaction.BeginTransactionException;
|
||||
import org.apache.doris.transaction.TransactionState;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -421,7 +418,7 @@ public abstract class RoutineLoadJob implements Writable, TxnStateChangeListener
|
||||
frontendService.loadTxnCommit(request);
|
||||
}
|
||||
|
||||
private void updateNumOfData(int numOfErrorData, int numOfTotalData) {
|
||||
private void updateNumOfData(long numOfErrorData, long numOfTotalData) {
|
||||
currentErrorNum += numOfErrorData;
|
||||
currentTotalNum += numOfTotalData;
|
||||
if (currentTotalNum > BASE_OF_ERROR_RATE) {
|
||||
@ -487,12 +484,8 @@ public abstract class RoutineLoadJob implements Writable, TxnStateChangeListener
|
||||
// step2: update job progress
|
||||
updateProgress(rlTaskTxnCommitAttachment.getProgress());
|
||||
|
||||
// step3: remove task in agentTaskQueue
|
||||
AgentTaskQueue.removeTask(rlTaskTxnCommitAttachment.getBackendId(), TTaskType.STREAM_LOAD,
|
||||
rlTaskTxnCommitAttachment.getTaskSignature());
|
||||
|
||||
// step4: if rate of error data is more then max_filter_ratio, pause job
|
||||
updateNumOfData(rlTaskTxnCommitAttachment.getNumOfErrorData(), rlTaskTxnCommitAttachment.getNumOfTotalData());
|
||||
updateNumOfData(rlTaskTxnCommitAttachment.getFilteredRows(), rlTaskTxnCommitAttachment.getLoadedRows());
|
||||
|
||||
if (state == JobState.RUNNING) {
|
||||
// step5: create a new task for partitions
|
||||
|
||||
@ -41,8 +41,8 @@ public abstract class TxnCommitAttachment implements Writable {
|
||||
|
||||
public static TxnCommitAttachment fromThrift(TTxnCommitAttachment txnCommitAttachment) {
|
||||
if (txnCommitAttachment != null) {
|
||||
switch (txnCommitAttachment.txnSourceType) {
|
||||
case ROUTINE_LOAD_TASK:
|
||||
switch (txnCommitAttachment.getLoadType()) {
|
||||
case ROUTINE_LOAD:
|
||||
return new RLTaskTxnCommitAttachment(txnCommitAttachment.getRlTaskTxnCommitAttachment());
|
||||
default:
|
||||
return null;
|
||||
|
||||
@ -39,6 +39,7 @@ import org.apache.doris.thrift.TMiniLoadEtlTaskRequest;
|
||||
import org.apache.doris.thrift.TNetworkAddress;
|
||||
import org.apache.doris.thrift.TPullLoadSubTaskInfo;
|
||||
import org.apache.doris.thrift.TResultBatch;
|
||||
import org.apache.doris.thrift.TRoutineLoadTask;
|
||||
import org.apache.doris.thrift.TSnapshotRequest;
|
||||
import org.apache.doris.thrift.TStatus;
|
||||
import org.apache.doris.thrift.TTabletStatResult;
|
||||
@ -217,6 +218,12 @@ public class GenericPoolTest {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TStatus submit_routine_load_task(TRoutineLoadTask task) throws TException {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -22,11 +22,6 @@ import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import mockit.Deencapsulation;
|
||||
import mockit.Expectations;
|
||||
import mockit.Injectable;
|
||||
import mockit.Mocked;
|
||||
import org.apache.doris.catalog.Catalog;
|
||||
import org.apache.doris.catalog.CatalogTestUtil;
|
||||
import org.apache.doris.catalog.Database;
|
||||
@ -51,12 +46,14 @@ import org.apache.doris.load.routineload.RoutineLoadTaskInfo;
|
||||
import org.apache.doris.meta.MetaContext;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.thrift.TKafkaRLTaskProgress;
|
||||
import org.apache.doris.thrift.TLoadSourceType;
|
||||
import org.apache.doris.thrift.TRLTaskTxnCommitAttachment;
|
||||
import org.apache.doris.thrift.TResourceInfo;
|
||||
import org.apache.doris.thrift.TRoutineLoadType;
|
||||
import org.apache.doris.thrift.TUniqueId;
|
||||
import org.apache.doris.transaction.TransactionState.LoadJobSourceType;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.apache.kafka.clients.consumer.KafkaConsumer;
|
||||
@ -70,6 +67,11 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import mockit.Deencapsulation;
|
||||
import mockit.Expectations;
|
||||
import mockit.Injectable;
|
||||
import mockit.Mocked;
|
||||
|
||||
public class GlobalTransactionMgrTest {
|
||||
|
||||
private static FakeEditLog fakeEditLog;
|
||||
@ -331,25 +333,21 @@ public class GlobalTransactionMgrTest {
|
||||
routineLoadJob.setState(RoutineLoadJob.JobState.RUNNING);
|
||||
|
||||
TRLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = new TRLTaskTxnCommitAttachment();
|
||||
rlTaskTxnCommitAttachment.setBackendId(1L);
|
||||
rlTaskTxnCommitAttachment.setTaskSignature(1L);
|
||||
rlTaskTxnCommitAttachment.setNumOfTotalData(100);
|
||||
rlTaskTxnCommitAttachment.setNumOfErrorData(1);
|
||||
rlTaskTxnCommitAttachment.setTaskId("label");
|
||||
rlTaskTxnCommitAttachment.setId(new TUniqueId());
|
||||
rlTaskTxnCommitAttachment.setLoadedRows(100);
|
||||
rlTaskTxnCommitAttachment.setFilteredRows(1);
|
||||
rlTaskTxnCommitAttachment.setJobId(Deencapsulation.getField(routineLoadJob, "id"));
|
||||
rlTaskTxnCommitAttachment.setRoutineLoadType(TRoutineLoadType.KAFKA);
|
||||
rlTaskTxnCommitAttachment.setLoadSourceType(TLoadSourceType.KAFKA);
|
||||
TKafkaRLTaskProgress tKafkaRLTaskProgress = new TKafkaRLTaskProgress();
|
||||
Map<Integer, Long> kafkaProgress = Maps.newHashMap();
|
||||
kafkaProgress.put(1, 10L);
|
||||
tKafkaRLTaskProgress.setPartitionIdToOffset(kafkaProgress);
|
||||
tKafkaRLTaskProgress.setPartitionCmtOffset(kafkaProgress);
|
||||
rlTaskTxnCommitAttachment.setKafkaRLTaskProgress(tKafkaRLTaskProgress);
|
||||
TxnCommitAttachment txnCommitAttachment = new RLTaskTxnCommitAttachment(rlTaskTxnCommitAttachment);
|
||||
|
||||
|
||||
RoutineLoadManager routineLoadManager = new RoutineLoadManager();
|
||||
routineLoadManager.addRoutineLoadJob(routineLoadJob);
|
||||
|
||||
|
||||
new Expectations() {
|
||||
{
|
||||
catalog.getDb(1L);
|
||||
@ -409,25 +407,21 @@ public class GlobalTransactionMgrTest {
|
||||
routineLoadJob.setState(RoutineLoadJob.JobState.RUNNING);
|
||||
|
||||
TRLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = new TRLTaskTxnCommitAttachment();
|
||||
rlTaskTxnCommitAttachment.setBackendId(1L);
|
||||
rlTaskTxnCommitAttachment.setTaskSignature(1L);
|
||||
rlTaskTxnCommitAttachment.setNumOfTotalData(100);
|
||||
rlTaskTxnCommitAttachment.setNumOfErrorData(11);
|
||||
rlTaskTxnCommitAttachment.setTaskId("label");
|
||||
rlTaskTxnCommitAttachment.setId(new TUniqueId());
|
||||
rlTaskTxnCommitAttachment.setLoadedRows(100);
|
||||
rlTaskTxnCommitAttachment.setFilteredRows(11);
|
||||
rlTaskTxnCommitAttachment.setJobId(Deencapsulation.getField(routineLoadJob, "id"));
|
||||
rlTaskTxnCommitAttachment.setRoutineLoadType(TRoutineLoadType.KAFKA);
|
||||
rlTaskTxnCommitAttachment.setLoadSourceType(TLoadSourceType.KAFKA);
|
||||
TKafkaRLTaskProgress tKafkaRLTaskProgress = new TKafkaRLTaskProgress();
|
||||
Map<Integer, Long> kafkaProgress = Maps.newHashMap();
|
||||
kafkaProgress.put(1, 10L);
|
||||
tKafkaRLTaskProgress.setPartitionIdToOffset(kafkaProgress);
|
||||
tKafkaRLTaskProgress.setPartitionCmtOffset(kafkaProgress);
|
||||
rlTaskTxnCommitAttachment.setKafkaRLTaskProgress(tKafkaRLTaskProgress);
|
||||
TxnCommitAttachment txnCommitAttachment = new RLTaskTxnCommitAttachment(rlTaskTxnCommitAttachment);
|
||||
|
||||
|
||||
RoutineLoadManager routineLoadManager = new RoutineLoadManager();
|
||||
routineLoadManager.addRoutineLoadJob(routineLoadJob);
|
||||
|
||||
|
||||
new Expectations() {
|
||||
{
|
||||
catalog.getDb(1L);
|
||||
|
||||
@ -192,7 +192,7 @@ struct TRecoverTabletReq {
|
||||
3: optional Types.TVersion version
|
||||
4: optional Types.TVersionHash version_hash
|
||||
}
|
||||
|
||||
|
||||
struct TAgentTaskRequest {
|
||||
1: required TAgentServiceVersion protocol_version
|
||||
2: required Types.TTaskType task_type
|
||||
|
||||
@ -63,6 +63,29 @@ struct TTabletStatResult {
|
||||
1: required map<i64, TTabletStat> tablets_stats
|
||||
}
|
||||
|
||||
struct TKafkaLoadInfo {
|
||||
1: required string brokers;
|
||||
2: required string group_id;
|
||||
3: required string client_id;
|
||||
4: required string topic;
|
||||
5: optional i64 max_interval_s;
|
||||
6: optional i64 max_batch_rows;
|
||||
7: optional i64 max_batch_size;
|
||||
8: optional map<i32, i64> partition_begin_offset;
|
||||
}
|
||||
|
||||
struct TRoutineLoadTask {
|
||||
1: required Types.TLoadSourceType type
|
||||
2: required i64 job_id
|
||||
3: required Types.TUniqueId id
|
||||
4: required i64 txn_id
|
||||
5: required i64 auth_code
|
||||
6: optional string db
|
||||
7: optional string tbl
|
||||
8: optional string label
|
||||
9: optional TKafkaLoadInfo kafka_load_info
|
||||
}
|
||||
|
||||
service BackendService {
|
||||
// Called by coord to start asynchronous execution of plan fragment in backend.
|
||||
// Returns as soon as all incoming data streams have been set up.
|
||||
@ -119,4 +142,6 @@ service BackendService {
|
||||
Status.TStatus erase_export_task(1:Types.TUniqueId task_id);
|
||||
|
||||
TTabletStatResult get_tablet_stat();
|
||||
|
||||
Status.TStatus submit_routine_load_task(1:TRoutineLoadTask task);
|
||||
}
|
||||
|
||||
@ -458,6 +458,7 @@ struct TLoadTxnBeginRequest {
|
||||
6: optional string user_ip
|
||||
7: required string label
|
||||
8: optional i64 timestamp
|
||||
9: optional i64 auth_code
|
||||
}
|
||||
|
||||
struct TLoadTxnBeginResult {
|
||||
@ -493,6 +494,7 @@ struct TStreamLoadPutRequest {
|
||||
14: optional string columnSeparator
|
||||
|
||||
15: optional string partitions
|
||||
16: optional i64 auth_code
|
||||
}
|
||||
|
||||
struct TStreamLoadPutResult {
|
||||
@ -501,31 +503,24 @@ struct TStreamLoadPutResult {
|
||||
2: optional PaloInternalService.TExecPlanFragmentParams params
|
||||
}
|
||||
|
||||
enum TRoutineLoadType {
|
||||
KAFKA = 1
|
||||
}
|
||||
|
||||
struct TKafkaRLTaskProgress {
|
||||
1: required map<i32,i64> partitionIdToOffset
|
||||
}
|
||||
|
||||
enum TTxnSourceType {
|
||||
ROUTINE_LOAD_TASK = 1
|
||||
1: required map<i32,i64> partitionCmtOffset
|
||||
}
|
||||
|
||||
struct TRLTaskTxnCommitAttachment {
|
||||
1: required TRoutineLoadType routineLoadType
|
||||
2: required i64 backendId
|
||||
3: required i64 taskSignature
|
||||
4: required i32 numOfErrorData
|
||||
5: required i32 numOfTotalData
|
||||
6: required string taskId
|
||||
7: required string jobId
|
||||
8: optional TKafkaRLTaskProgress kafkaRLTaskProgress
|
||||
1: required Types.TLoadSourceType loadSourceType
|
||||
2: required Types.TUniqueId id
|
||||
3: required i64 jobId
|
||||
4: optional i64 loadedRows
|
||||
5: optional i64 filteredRows
|
||||
6: optional i64 receivedBytes
|
||||
7: optional i64 loadedBytes
|
||||
8: optional i64 loadCostMs
|
||||
9: optional TKafkaRLTaskProgress kafkaRLTaskProgress
|
||||
}
|
||||
|
||||
struct TTxnCommitAttachment {
|
||||
1: required TTxnSourceType txnSourceType
|
||||
1: required Types.TLoadType loadType
|
||||
2: optional TRLTaskTxnCommitAttachment rlTaskTxnCommitAttachment
|
||||
}
|
||||
|
||||
@ -539,7 +534,8 @@ struct TLoadTxnCommitRequest {
|
||||
7: required i64 txnId
|
||||
8: required bool sync
|
||||
9: optional list<Types.TTabletCommitInfo> commitInfos
|
||||
10: optional TTxnCommitAttachment txnCommitAttachment
|
||||
10: optional i64 auth_code
|
||||
11: optional TTxnCommitAttachment txnCommitAttachment
|
||||
}
|
||||
|
||||
struct TLoadTxnCommitResult {
|
||||
@ -555,6 +551,8 @@ struct TLoadTxnRollbackRequest {
|
||||
6: optional string user_ip
|
||||
7: required i64 txnId
|
||||
8: optional string reason
|
||||
9: optional i64 auth_code
|
||||
10: optional TTxnCommitAttachment txnCommitAttachment
|
||||
}
|
||||
|
||||
struct TLoadTxnRollbackResult {
|
||||
|
||||
@ -354,3 +354,12 @@ struct TTabletCommitInfo {
|
||||
2: required i64 backendId
|
||||
}
|
||||
|
||||
enum TLoadType {
|
||||
MANUL_LOAD,
|
||||
ROUTINE_LOAD,
|
||||
}
|
||||
|
||||
enum TLoadSourceType {
|
||||
RAW,
|
||||
KAFKA,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user