Files
oceanbase/src/sql/engine/cmd/ob_load_data_impl.cpp
2023-12-14 04:13:25 +00:00

3146 lines
118 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "share/rc/ob_tenant_base.h"
#define USING_LOG_PREFIX SQL_ENG
//#define TEST_MODE
#include "sql/engine/cmd/ob_load_data_impl.h"
#include <math.h>
#include "observer/omt/ob_multi_tenant.h"
#include "lib/oblog/ob_log_module.h"
#include "lib/string/ob_sql_string.h"
#include "storage/access/ob_dml_param.h"
#include "sql/parser/ob_parser.h"
#include "sql/resolver/ob_resolver.h"
#include "sql/resolver/dml/ob_insert_stmt.h"
#include "sql/plan_cache/ob_sql_parameterization.h"
#include "sql/code_generator/ob_expr_generator_impl.h"
#include "sql/code_generator/ob_code_generator.h"
#include "sql/engine/ob_exec_context.h"
#include "sql/engine/cmd/ob_load_data_utils.h"
#include "sql/engine/ob_physical_plan_ctx.h"
#include "sql/resolver/expr/ob_raw_expr_util.h"
#include "sql/das/ob_das_location_router.h"
#include "share/ob_tenant_mgr.h"
#include "share/ob_tenant_memstore_info_operator.h"
#include "sql/resolver/ob_schema_checker.h"
#include "observer/ob_inner_sql_connection_pool.h"
#include "observer/ob_inner_sql_result.h"
#include "share/ob_device_manager.h"
#include "share/backup/ob_backup_io_adapter.h"
#include "storage/tx_storage/ob_tenant_freezer.h"
#include "sql/rewrite/ob_transform_utils.h"
#include "observer/omt/ob_tenant_timezone_mgr.h"
#include "share/config/ob_config_helper.h"
using namespace oceanbase::sql;
using namespace oceanbase::common;
using namespace oceanbase::share;
using namespace oceanbase::share::schema;
using namespace oceanbase::storage;
using namespace oceanbase::observer;
namespace oceanbase
{
namespace sql
{
#ifdef TEST_MODE
static const int64_t INSERT_TASK_DROP_RATE = 1;
static void delay_process_by_probability(int64_t percentage) {
if (OB_UNLIKELY(ObRandom::rand(1, 100) <= percentage)) {
ob_usleep(RPC_BATCH_INSERT_TIMEOUT_US);
}
}
#endif
#define OW(statement) \
do {\
int inner_ret = statement;\
if (OB_UNLIKELY(OB_SUCCESS != inner_ret)) {\
LOG_WARN("fail to exec"#statement, K(inner_ret));\
if (OB_SUCC(ret)) { ret = inner_ret; }\
}\
} while (0)
const char *ObLoadDataBase::SERVER_TENANT_MEMORY_EXAMINE_SQL =
"SELECT case when memstore_used < freeze_trigger * 1.02 then false else true end"
" as need_wait_freeze"
" FROM oceanbase.__all_virtual_tenant_memstore_info WHERE tenant_id = %ld"
" and svr_ip = '%s' and svr_port = %d";
const char *log_file_column_names = "\nBatchId\tLineNum\tType\tErrCode\tErrMsg\t\n";
const char *log_file_row_fmt = "%ld\t%ld\t%s\t%d\t%.*s\t\n";
static const int64_t WAIT_INTERVAL_US = 1 * 1000 * 1000; //1s
int ObLoadDataBase::generate_fake_field_strs(ObIArray<ObString> &file_col_values,
ObIAllocator &allocator,
const char id_char)
{
int ret = OB_SUCCESS;
char *buf = NULL;
/* to generate string like "F1F2F3...F99....Fn" into buf
* maxn = 512
*/
int64_t buf_len = 6 * file_col_values.count();
int64_t pos = 0;
if (OB_ISNULL(buf = static_cast<char*>(allocator.alloc(buf_len * sizeof(char))))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < file_col_values.count(); ++i) {
int64_t pos_bak = pos;
if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%c%ld", id_char, i))) { //F1 F2 ..
LOG_WARN("generate str failed", K(ret), K(pos), K(buf_len));
} else {
file_col_values.at(i).assign_ptr(buf + pos_bak, pos - pos_bak);
}
}
LOG_DEBUG("generate fake field result", K(file_col_values));
}
return ret;
}
int ObLoadDataBase::construct_insert_sql(ObSqlString &insert_sql,
const ObString &q_name,
ObIArray<ObLoadTableColumnDesc> &desc,
ObIArray<ObString> &insert_values,
int64_t num_rows)
{
int ret = OB_SUCCESS;
insert_sql.reuse();
char q = lib::is_oracle_mode() ? '"' : '`';
if (OB_UNLIKELY(q_name.empty())
|| OB_UNLIKELY(desc.count() * num_rows != insert_values.count())) {
ret = OB_INVALID_ARGUMENT;
}
OX (ret = insert_sql.assign("INSERT INTO "));
OX (ret = insert_sql.append(q_name));
for (int64_t i = 0; OB_SUCC(ret) && i < desc.count(); ++i) {
OX (ret = insert_sql.append(0 == i ? "(" : ","));
OX (ret = insert_sql.append_fmt("%c%.*s%c", q,
desc.at(i).column_name_.length(), desc.at(i).column_name_.ptr(),
q));
}
OX (ret = insert_sql.append(") VALUES "));
for (int64_t i = 0; OB_SUCC(ret) && i < insert_values.count(); ++i) {
OX (ret = insert_sql.append(i % desc.count() == 0 ? (i == 0 ? "(" : "),(") : ","));
OX (ret = insert_sql.append_fmt("'%.*s'",
insert_values.at(i).length(), insert_values.at(i).ptr()));
}
OX (ret = insert_sql.append(")"));
if (OB_FAIL(ret)) {
LOG_WARN("fail to append data", K(ret), K(insert_sql));
} else {
LOG_DEBUG("insert sql generated", K(insert_sql));
}
return ret;
}
int ObLoadDataBase::make_parameterize_stmt(ObExecContext &ctx,
ObSqlString &insertsql,
ParamStore &param_store,
ObInsertStmt *&insert_stmt)
{
int ret = OB_SUCCESS;
ObSQLSessionInfo *session = NULL;
if (OB_ISNULL(session = ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is null", K(ret));
} else if (OB_ISNULL(ctx.get_sql_ctx())
|| OB_ISNULL(ctx.get_sql_ctx()->schema_guard_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql ctx is null", K(ret));
} else {
ObParser parser(ctx.get_allocator(), session->get_sql_mode());
ParseResult parse_result;
SqlInfo not_param_info;
bool is_transform_outline = false;
ObMaxConcurrentParam::FixParamStore fixed_param_store(OB_MALLOC_NORMAL_BLOCK_SIZE,
ObWrapperAllocator(&ctx.get_allocator()));
if (OB_FAIL(parser.parse(insertsql.string(), parse_result))) {
LOG_WARN("parser template insert sql failed", K(ret));
} else if (OB_FAIL(ObSqlParameterization::transform_syntax_tree(ctx.get_allocator(),
*session,
NULL,
parse_result.result_tree_,
not_param_info,
param_store,
NULL,
fixed_param_store,
is_transform_outline))) {
LOG_WARN("parameterize parser tree failed", K(ret));
} else {
SMART_VAR(ObResolverParams, resolver_ctx) {
ObSchemaChecker schema_checker;
schema_checker.init(*(ctx.get_sql_ctx()->schema_guard_));
resolver_ctx.allocator_ = &ctx.get_allocator();
resolver_ctx.schema_checker_ = &schema_checker;
resolver_ctx.session_info_ = session;
resolver_ctx.param_list_ = &param_store;
resolver_ctx.database_id_ = session->get_database_id();
resolver_ctx.disable_privilege_check_ = PRIV_CHECK_FLAG_DISABLE;
resolver_ctx.expr_factory_ = ctx.get_expr_factory();
resolver_ctx.stmt_factory_ = ctx.get_stmt_factory();
if (OB_ISNULL(ctx.get_stmt_factory())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid argument", K(ret), KP(ctx.get_stmt_factory()));
} else if (OB_ISNULL(ctx.get_stmt_factory()->get_query_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid argument", K(ret), KP(ctx.get_stmt_factory()->get_query_ctx()));
} else {
resolver_ctx.query_ctx_ = ctx.get_stmt_factory()->get_query_ctx();
resolver_ctx.query_ctx_->question_marks_count_ = param_store.count();
resolver_ctx.query_ctx_->sql_schema_guard_.set_schema_guard(ctx.get_sql_ctx()->schema_guard_);
ObResolver resolver(resolver_ctx);
ObStmt *astmt = NULL;
ParseNode *stmt_tree = parse_result.result_tree_->children_[0];
if (OB_ISNULL(stmt_tree) || OB_ISNULL(ctx.get_stmt_factory()->get_query_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid argument", K(ret), K(stmt_tree));
} else if (OB_FAIL(resolver.resolve(ObResolver::IS_NOT_PREPARED_STMT,
*stmt_tree,
astmt))) {
LOG_WARN("resolve sql failed", K(ret), K(insertsql));
} else {
insert_stmt = static_cast<ObInsertStmt*>(astmt);
ctx.get_stmt_factory()->get_query_ctx()->reset();
}
}
}
}
}
return ret;
}
int ObLoadDataBase::memory_check_remote(uint64_t tenant_id, bool &need_wait_minor_freeze)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
storage::ObTenantFreezer *freezer = nullptr;
if (FALSE_IT(freezer = MTL(storage::ObTenantFreezer *))) {
} else {
int64_t active_memstore_used = 0;
int64_t total_memstore_used = 0;
int64_t major_freeze_trigger = 0;
int64_t memstore_limit = 0;
int64_t freeze_cnt = 0;
if (OB_FAIL(freezer->get_tenant_memstore_cond(active_memstore_used,
total_memstore_used,
major_freeze_trigger,
memstore_limit,
freeze_cnt))) {
LOG_WARN("fail to get memstore used", K(ret));
} else {
if (total_memstore_used > (memstore_limit - major_freeze_trigger)/2 + major_freeze_trigger) {
need_wait_minor_freeze = true;
} else {
need_wait_minor_freeze = false;
}
}
LOG_DEBUG("load data check tenant memory usage", K(active_memstore_used),
K(total_memstore_used),
K(major_freeze_trigger),
K(memstore_limit),
K(freeze_cnt),
K(need_wait_minor_freeze));
}
} else {
LOG_ERROR("switch tenant failed", K(tenant_id), K(ret));
}
return ret;
}
/*
* if param_a != param_b: this variable is from a field of data file,
* calc the corresponding field index via param string value
* return the index
*/
int ObLoadDataBase::calc_param_offset(const ObObj &param_a,
const ObObj &param_b,
int64_t &idx)
{
int ret = OB_SUCCESS;
if (!param_a.is_varchar_or_char() || !param_b.is_varchar_or_char()) {
idx = OB_INVALID_INDEX_INT64;
} else if (param_a.get_string().compare(param_b.get_string()) != 0) {
const ObObj &value = param_a;
const char *value_ptr = value.get_string_ptr();
/* 这里处理的insert模板中的数据,是自己造的,
一定是string类型,字符串内容是 "[F|f][0-9]+",因此长度大于等于2 */
if (value.get_string_len() < 2 || NULL == value_ptr) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("no possible, the values are changed", K(ret));
} else {
int64_t temp_idx = 0;
for (int32_t j = 1; OB_SUCC(ret) && j < value.get_string_len(); ++j) {
char cur_char = *(value_ptr + j);
if (cur_char > '9' || cur_char < '0') {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("no possible, the values are changed", K(ret));
} else {
temp_idx *= 10;
temp_idx += cur_char - '0';
}
}
idx = temp_idx;
}
}
return ret;
}
int ObLoadDataBase::memory_wait_local(ObExecContext &ctx,
const ObTabletID &tablet_id,
ObAddr &server_addr,
int64_t &total_wait_secs,
bool &is_leader_changed)
{
int ret = OB_SUCCESS;
static const int64_t WAIT_INTERVAL_US = 1 * 1000 * 1000; //1s
ObSQLSessionInfo *session = NULL;
ObMySQLProxy *sql_proxy_ = NULL;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
sqlclient::ObMySQLResult *result = NULL;
ObSqlString sql;
int64_t start_wait_ts = ObTimeUtil::current_time();
int64_t wait_timeout_ts = 0;
uint64_t tenant_id = OB_INVALID_TENANT_ID;
if (OB_UNLIKELY(!tablet_id.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid server addr", K(ret), K(tablet_id));
} else if (OB_ISNULL((sql_proxy_ = GCTX.sql_proxy_))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy is null", K(ret));
} else if (OB_ISNULL(session = ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is null", K(ret));
} else {
session->get_query_timeout(wait_timeout_ts);
tenant_id = session->get_effective_tenant_id();
//print info
LOG_INFO("LOAD DATA is suspended until the memory is available",
K(tablet_id), K(server_addr), K(total_wait_secs));
}
bool need_wait_freeze = true;
ObAddr leader_addr;
ObDASLocationRouter &loc_router = DAS_CTX(ctx).get_location_router();
while (OB_SUCC(ret) && need_wait_freeze) {
ob_usleep(WAIT_INTERVAL_US);
leader_addr.reset();
res.reuse();
char leader_ip_str[MAX_IP_ADDR_LENGTH];
const int64_t retry_us = 200 * 1000;
//Try to use the results in the cache as much as possible, without forcing a cache refresh.
const int64_t expire_renew_time = 0;
if (OB_FAIL(ObLoadDataUtils::check_session_status(*session))) {
LOG_WARN("session is not valid during wait", K(ret));
} else if (OB_FAIL(loc_router.get_leader(tenant_id, tablet_id, leader_addr, expire_renew_time))) {
LOG_WARN("failed to get location", K(ret));
ob_usleep(retry_us);
} else {
LOG_DEBUG("get participants", K(tablet_id), K(leader_addr));
}
if (OB_FAIL(ret)) {
} else if (!leader_addr.ip_to_string(leader_ip_str, sizeof(leader_ip_str))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("format leader ip failed", K(ret), K(leader_addr));
} else if (OB_FAIL(sql.assign_fmt(SERVER_TENANT_MEMORY_EXAMINE_SQL,
tenant_id,
leader_ip_str,
leader_addr.get_port()))) {
LOG_WARN("fail to append sql", K(ret), K(tenant_id), K(leader_addr));
} else if (OB_FAIL(sql_proxy_->read(res, OB_SYS_TENANT_ID, sql.ptr()))) {
LOG_WARN("fail to execute sql", K(ret), K(sql));
} else if (NULL == (result = res.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get sql result", K(ret));
} else if (OB_FAIL(result->next())) {
LOG_WARN("fail to get result, force renew location", K(ret), K(leader_addr));
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
}
} else {
EXTRACT_BOOL_FIELD_MYSQL(*result, "need_wait_freeze", need_wait_freeze);
//LOG_INFO("LOAD DATA is waiting for tenant memory available",
//K(waited_seconds), K(total_wait_secs), K(tenant_id));
}
//if it is location exception, refresh location cache with block interface
//because load data can only local retry
loc_router.refresh_location_cache_by_errno(false, ret);
}
//print info
if (OB_SUCC(ret)) {
int64_t wait_secs = (ObTimeUtil::current_time() - start_wait_ts) / 1000000;
total_wait_secs += wait_secs;
if (leader_addr != server_addr) {
LOG_INFO("LOAD DATA location change",
"old_addr", server_addr,
"new_addr", leader_addr);
server_addr = leader_addr;
is_leader_changed = true;
} else {
is_leader_changed = false;
}
LOG_INFO("LOAD DATA is resumed",
"waited_seconds", wait_secs,
K(total_wait_secs));
}
}
return ret;
}
int ObLoadDataBase::pre_parse_lines(ObLoadFileBuffer &buffer,
ObCSVGeneralParser &parser,
bool is_last_buf,
int64_t &valid_len,
int64_t &line_count)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!buffer.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid buffer", K(ret));
} else if (parser.get_opt_params().is_simple_format_) {
const ObCSVGeneralFormat &format = parser.get_format();
char *cur_pos = buffer.begin_ptr();
int64_t cur_lines = 0;
for (char *p = buffer.begin_ptr(); p < buffer.current_ptr(); ++p) {
char cur_char = *p;
if (format.field_escaped_char_ == cur_char && p + 1 < buffer.current_ptr()) {
p++;
} else if (parser.get_opt_params().line_term_c_ == cur_char) {
cur_lines++;
cur_pos = p + 1;
if (cur_lines >= line_count) {
break;
}
}
}
if (is_last_buf && cur_lines < line_count && buffer.current_ptr() > cur_pos) {
cur_lines++;
cur_pos = buffer.current_ptr();
}
valid_len = cur_pos - buffer.begin_ptr();
line_count = cur_lines;
} else {
ObSEArray<ObCSVGeneralParser::LineErrRec, 128> err_records;
const char *ptr = buffer.begin_ptr();
const char *end = ptr + buffer.get_data_len();
auto unused_handler = [](ObIArray<ObCSVGeneralParser::FieldValue> &fields_per_line) -> int {
UNUSED(fields_per_line);
return OB_SUCCESS;
};
if (OB_FAIL(parser.scan(ptr, end, line_count, NULL, NULL, unused_handler, err_records, is_last_buf))) {
LOG_WARN("fail to scan buf", K(ret));
} else {
valid_len = ptr - buffer.begin_ptr();
}
}
return ret;
}
int ObInsertValueGenerator::fill_field_expr(ObIArray<ObCSVGeneralParser::FieldValue> &field_values,
const ObBitSet<> &string_values)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(field_values.count() != field_exprs_.count())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid input", K(ret), K(field_values), K(field_exprs_));
} else {
for (int i = 0; i < field_values.count(); ++i) {
auto expr = static_cast<ObConstRawExpr *>(field_exprs_.at(i));
ObLoadDataBase::field_to_obj(expr->get_value(),
field_values.at(i),
cs_type_,
string_values.has_member(i));
}
}
return ret;
}
int ObInsertValueGenerator::gen_insert_values(ObIArray<ObString> &insert_values,
ObStringBuf &str_buf)
{
int ret = OB_SUCCESS;
for (int i = 0; OB_SUCC(ret) && i < insert_exprs_.count(); ++i) {
auto expr = insert_exprs_.at(i);
ObString store_value;
data_buffer_->reset();
ObConstRawExpr *const_expr = NULL;
if (expr->get_expr_type() == T_DEFAULT) {
OZ (str_buf.write_string("DEFAULT", &store_value));
} else if (expr->is_const_raw_expr()
&& (const_expr = static_cast<ObConstRawExpr *>(expr))->get_value().is_string_type()) {
ObString const_string = const_expr->get_value().get_string();
ObCollationType coll_type = const_expr->get_value().get_collation_type();
uint32_t pos = 0;
if (ObCharset::charset_type_by_coll(coll_type) != CHARSET_UTF8MB4) {
if (OB_FAIL(ObCharset::charset_convert(
coll_type, const_string.ptr(), const_string.length(),
CS_TYPE_UTF8MB4_BIN, data_buffer_->begin_ptr(), data_buffer_->get_remain_len(), pos, false))) {
LOG_WARN("fail to convert charset", K(ret));
} else {
const_string.assign_ptr(data_buffer_->begin_ptr(), pos);
data_buffer_->update_pos(pos);
}
}
if (OB_SUCC(ret)) {
ObHexEscapeSqlStr escape_str(const_string, !!(SMO_NO_BACKSLASH_ESCAPES & sql_mode_));
int64_t len = escape_str.to_string(data_buffer_->current_ptr() + 1, data_buffer_->get_remain_len() - 1);
if (OB_UNLIKELY(len + 2 >= data_buffer_->get_remain_len())) {
ret = OB_SIZE_OVERFLOW;
LOG_WARN("fail to print string", K(ret), K(len), K(data_buffer_->get_remain_len()));
} else {
*data_buffer_->current_ptr() = '\'';
*(data_buffer_->current_ptr() + 1 + len) = '\'';
OZ (str_buf.write_string(ObString(static_cast<int32_t>(len + 2),
data_buffer_->current_ptr()), &store_value));
}
}
} else {
OZ (expr_printer_.do_print(expr, T_NONE_SCOPE));
OZ (str_buf.write_string(ObString(static_cast<int32_t>(data_buffer_->get_data_len()),
data_buffer_->begin_ptr()), &store_value));
}
OX (insert_values.at(i) = store_value);
//OZ (insert_values.push_back(store_value));
}
LOG_DEBUG("LOAD DATA insert values generated", K(insert_values));
return ret;
}
int ObInsertValueGenerator::gen_insert_sql(ObSqlString &insert_sql)
{
int ret = OB_SUCCESS;
OZ (insert_sql.append(insert_header_));
OZ (insert_sql.append(" VALUES("));
for (int i = 0; OB_SUCC(ret) && i < insert_exprs_.count(); ++i) {
auto expr = insert_exprs_.at(i);
if (i != 0) {
OZ (insert_sql.append(","));
}
/*if (expr->is_const_raw_expr() && static_cast<ObConstRawExpr *>(expr)->get_value().is_string_type()) {
auto const_expr = static_cast<ObConstRawExpr *>(expr);
OZ (insert_sql.append_fmt("'%.*s'",
const_expr->get_value().get_string_len(),
const_expr->get_value().get_string_ptr()));
} else {
*/
data_buffer_->reset();
OZ (expr_printer_.do_print(expr, T_NONE_SCOPE));
OZ (insert_sql.append(ObString(data_buffer_->get_data_len(), data_buffer_->begin_ptr())));
//}
}
OZ (insert_sql.append(")"));
return ret;
}
int ObInsertValueGenerator::set_params(ObString &insert_header, ObCollationType cs_type, int64_t sql_mode)
{
insert_header_ = insert_header;
cs_type_ = cs_type;
sql_mode_ = sql_mode;
return OB_SUCCESS;
}
int ObInsertValueGenerator::init(ObSQLSessionInfo &session,
ObLoadFileBuffer *data_buffer,
ObSchemaGetterGuard *schema_guard)
{
ObObjPrintParams param = session.create_obj_print_params();
param.cs_type_ = CS_TYPE_UTF8MB4_BIN;
expr_printer_.init(data_buffer->begin_ptr(),
data_buffer->get_buffer_size(),
data_buffer->get_pos(),
schema_guard,
param);
data_buffer_ = data_buffer;
return OB_SUCCESS;
}
int ObLoadDataSPImpl::gen_insert_columns_names_buff(ObExecContext &ctx,
const ObLoadArgument &load_args,
ObIArray<ObLoadTableColumnDesc> &insert_infos,
ObString &data_buff,
bool need_online_osg)
{
int ret = OB_SUCCESS;
ObSqlString insert_stmt;
ObSEArray<ObString, 16> insert_column_names;
if (OB_FAIL(insert_column_names.reserve(insert_infos.count()))) {
LOG_WARN("fail to reserve", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
if (OB_FAIL(insert_column_names.push_back(insert_infos.at(i).column_name_))) {
LOG_WARN("fail to push back", K(ret));
}
}
/*
if (OB_SUCC(ret)) {
int64_t len = 0;
char *buf = 0;
OB_UNIS_ADD_LEN(insert_column_names);
if (OB_ISNULL(buf = static_cast<char *>(ctx.get_allocator().alloc(len)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ret));
} else {
data_buff.set_data(buf, len);
int64_t buf_len = len;
int64_t pos = 0;
OB_UNIS_ENCODE(insert_column_names);
}
}
*/
if (OB_SUCC(ret)) {
if (OB_FAIL(ObLoadDataUtils::build_insert_sql_string_head(load_args.dupl_action_,
load_args.combined_name_,
insert_column_names,
insert_stmt,
need_online_osg))) {
LOG_WARN("gen insert sql column_names failed", K(ret));
} else if (OB_FAIL(ob_write_string(ctx.get_allocator(), insert_stmt.string(), data_buff))) {
LOG_WARN("fail to write string", K(ret));
}
}
return ret;
}
class ReplaceVariables : public ObIRawExprReplacer
{
public:
ReplaceVariables(ObExecContext &ctx,
ObLoadDataStmt &stmt,
ObIArray<ObRawExpr *> &fields)
: ctx_(ctx), load_stmt_(stmt), field_exprs_(fields) {}
int generate_new_expr(ObRawExprFactory &expr_factory, ObRawExpr *raw_expr, ObRawExpr *&new_expr)
{
int ret = OB_SUCCESS;
UNUSED(expr_factory);
ObSQLSessionInfo *session = NULL;
if (OB_ISNULL(raw_expr)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K((ret)));
} else if (OB_ISNULL(session = ctx_.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is null", K(ret));
} else if (raw_expr->get_expr_type() == T_REF_COLUMN
|| raw_expr->get_expr_type() == T_OP_GET_USER_VAR) {
ObRawExpr *orig_expr = raw_expr;
bool is_user_variable = false;
//1. get variable name
ObString ref_name;
if (raw_expr->get_expr_type() == T_REF_COLUMN) {
ObColumnRefRawExpr *column_ref = static_cast<ObColumnRefRawExpr*>(raw_expr);
ref_name = column_ref->get_column_name();
} else {
is_user_variable = true;
ObSysFunRawExpr *func_expr = static_cast<ObSysFunRawExpr*>(raw_expr);
if (func_expr->get_children_count() != 1) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sys func expr child num is not correct", K(ret));
} else {
ObConstRawExpr *c_expr = static_cast<ObConstRawExpr*>(func_expr->get_param_expr(0));
if (c_expr->get_value().get_type() != ObVarcharType) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("const expr child type is not correct", K(ret));
} else {
ref_name = c_expr->get_value().get_string();
}
}
}
//2. find and replace
int64_t idx = OB_INVALID_INDEX;
if (OB_SUCC(ret)) {
for (int64_t i = 0; i < load_stmt_.get_field_or_var_list().count(); ++i) {
if (0 == load_stmt_.get_field_or_var_list().at(i).field_or_var_name_.compare(ref_name)) {
idx = i;
break;
}
}
if (OB_INVALID_INDEX != idx) {
new_expr = field_exprs_.at(idx);
} else {
if (!is_user_variable) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unknown column name in set right expr, do nothing", K(ret), K(ref_name));
} else {
ObConstRawExpr *c_expr = NULL;
//find the real value from session
if (OB_ISNULL(c_expr = OB_NEWx(ObConstRawExpr, (&ctx_.get_allocator())))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate const raw expr failed", K(ret));
} else {
ObObj var_obj;
ObSessionVariable user_var;
if (OB_FAIL(session->get_user_variable(ref_name, user_var))) {
LOG_WARN("get user variable failed", K(ret), K(ref_name));
} else {
var_obj = user_var.value_;
var_obj.set_meta_type(user_var.meta_);
c_expr->set_value(var_obj);
new_expr = c_expr;
}
}
}
}
}
/*
if (OB_SUCC(ret) && need_replaced_to_loaded_data_from_file) {
raw_expr = c_expr;
ObLoadDataReplacedExprInfo varable_info;
varable_info.replaced_expr = c_expr;
varable_info.correspond_file_field_idx = idx;
if (OB_FAIL(generator.add_file_column_replace_info(varable_info))) {
LOG_WARN("push back replaced variable infos array failed", K(ret));
}
}
*/
LOG_DEBUG("replace variable name to field value",
K(ref_name), K(idx), KPC(orig_expr), KPC(raw_expr), KPC(new_expr));
}
return ret;
}
ObExecContext &ctx_;
ObLoadDataStmt &load_stmt_;
ObIArray<ObRawExpr *> &field_exprs_;
};
int ObLoadDataSPImpl::copy_exprs_for_shuffle_task(ObExecContext &ctx,
ObLoadDataStmt &load_stmt,
ObIArray<ObLoadTableColumnDesc> &insert_infos,
ObIArray<ObRawExpr *> &field_exprs,
ObIArray<ObRawExpr *> &insert_exprs)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(ctx.get_expr_factory())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("expr factory is null", K(ret));
}
OZ (field_exprs.reserve(load_stmt.get_field_or_var_list().count()));
for (int i = 0; OB_SUCC(ret) && i < load_stmt.get_field_or_var_list().count(); ++i) {
ObConstRawExpr *field_expr = NULL;
OZ (ObRawExprUtils::build_const_string_expr(*ctx.get_expr_factory(),
ObVarcharType,
ObString(),
load_stmt.get_load_arguments().file_cs_type_,
field_expr));
OZ (field_exprs.push_back(field_expr));
}
OZ (insert_exprs.reserve(insert_infos.count()));
if (OB_SUCC(ret)) {
ObRawExprCopier copier(*ctx.get_expr_factory());
ReplaceVariables replacer(ctx, load_stmt, field_exprs);
for (int i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
ObRawExpr *insert_expr = nullptr;
ObLoadTableColumnDesc &desc = insert_infos.at(i);
if (OB_NOT_NULL(desc.expr_value_)) {
OZ (copier.copy_on_replace(desc.expr_value_, insert_expr, &replacer));
} else {
insert_expr = field_exprs.at(desc.array_ref_idx_);
}
OZ (insert_exprs.push_back(insert_expr));
LOG_DEBUG("push final insert expr", KPC(insert_expr));
}
}
return ret;
}
int ObLoadDataSPImpl::gen_load_table_column_desc(ObExecContext &ctx,
ObLoadDataStmt &load_stmt,
ObIArray<ObLoadTableColumnDesc> &insert_infos)
{
UNUSED(ctx);
int ret = OB_SUCCESS;
//e.g. general stmt like "INTO TABLE t1 (c1, c2, @a, @b) SET c3 = @a + @b"
// step 1: add c1 and c2
// the first column of file will be written to t1.c1, so c1 will be added to the generator
// similarly, the second column to t1.c2 which also will be added to the generator
// step 2: add c3 (calced by the first assign)
// @a, @b is not match column name, but their data will produce c3 by the "SET" clause,
// in result, c3 will be added
// in addition, replace expr @a with a const string expr which refer to a column from file
// do the same replace to @b
//step 1
for (int64_t i = 0; OB_SUCC(ret) && i < load_stmt.get_field_or_var_list().count(); ++i) {
ObLoadDataStmt::FieldOrVarStruct &item = load_stmt.get_field_or_var_list().at(i);
if (item.is_table_column_) {
ObLoadTableColumnDesc tmp_info;
tmp_info.is_set_values_ = false;
tmp_info.column_name_ = item.field_or_var_name_;
tmp_info.column_id_ = item.column_id_;
tmp_info.column_type_ = item.column_type_;
tmp_info.array_ref_idx_ = i; //array offset
tmp_info.expr_value_ = NULL;
if (OB_FAIL(insert_infos.push_back(tmp_info))) {
LOG_WARN("push str failed", K(ret));
}
} else {
//do nothing
//ignore variables temporarily
}
}
//step 2
for (int64_t i = 0; OB_SUCC(ret) && i < load_stmt.get_table_assignment().count(); ++i) {
const ObAssignment &assignment = load_stmt.get_table_assignment().at(i);
ObColumnRefRawExpr *left = assignment.column_expr_;
ObRawExpr *right = assignment.expr_;
if (OB_ISNULL(left)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("set assign expr is null", K(ret));
} /*else if (OB_FAIL(ObRawExprUtils::copy_expr(*ctx.get_expr_factory(),
assignment.expr_,
right,
COPY_REF_SHARED))) {
LOG_WARN("fail to copy expr", K(ret));
} */else {
int64_t found_index = OB_INVALID_INDEX_INT64;
for (int64_t j = 0; j < insert_infos.count(); ++j) {
if (insert_infos.at(j).column_id_ == left->get_column_id()) {
found_index = j;
break;
}
}
if (found_index != OB_INVALID_INDEX_INT64) {
//overwrite
ObLoadTableColumnDesc &tmp_info = insert_infos.at(found_index);
tmp_info.is_set_values_ = true;
tmp_info.array_ref_idx_ = OB_INVALID_INDEX_INT64;
tmp_info.expr_value_ = right;
} else {
//a new insert column is defined by set expr
ObLoadTableColumnDesc tmp_info;
tmp_info.column_name_ = left->get_column_name();
tmp_info.column_id_ = left->get_column_id();
tmp_info.column_type_ = left->get_result_type().get_type();
tmp_info.is_set_values_ = true;
tmp_info.expr_value_ = right;
if (OB_FAIL(insert_infos.push_back(tmp_info))) {
LOG_WARN("push str failed", K(ret));
}
}
}
}
LOG_DEBUG("generate insert info", K(insert_infos));
return ret;
}
void ObCSVFormats::init(const ObDataInFileStruct &file_formats)
{
field_term_char_ = file_formats.field_term_str_.empty() ?
INT64_MAX : file_formats.field_term_str_[0];
line_term_char_ = file_formats.line_term_str_.empty() ?
INT64_MAX : file_formats.line_term_str_[0];
enclose_char_ = file_formats.field_enclosed_char_;
escape_char_ = file_formats.field_escaped_char_;
null_column_fill_zero_string_ = lib::is_mysql_mode();
if (!file_formats.field_term_str_.empty()
&& file_formats.line_term_str_.empty()) {
is_line_term_by_counting_field_ = true;
line_term_char_ = field_term_char_;
}
is_simple_format_ =
!is_line_term_by_counting_field_
&& (field_term_char_ != INT64_MAX)
&& (line_term_char_ != INT64_MAX)
&& (field_term_char_ != line_term_char_)
&& (enclose_char_ == INT64_MAX);
}
ObShuffleTaskHandle::ObShuffleTaskHandle(ObDataFragMgr &main_datafrag_mgr,
ObBitSet<> &main_string_values,
uint64_t tenant_id)
: allocator(ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA)),
exec_ctx(allocator, GCTX.session_mgr_),
data_buffer(NULL),
escape_buffer(NULL),
calc_tablet_id_expr(NULL),
datafrag_mgr(main_datafrag_mgr),
string_values(main_string_values)
{
attr = ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA);
}
ObShuffleTaskHandle::~ObShuffleTaskHandle()
{
if (OB_NOT_NULL(data_buffer)) {
ob_free(data_buffer);
}
if (OB_NOT_NULL(escape_buffer)) {
ob_free(escape_buffer);
}
}
int ObShuffleTaskHandle::expand_buf(const int64_t max_size)
{
int ret = OB_SUCCESS;
int64_t new_size = 0;
if (OB_ISNULL(data_buffer)) {
new_size = ObLoadFileBuffer::MAX_BUFFER_SIZE;
} else {
new_size = (sizeof(ObLoadFileBuffer) + data_buffer->get_buffer_size()) * 2;
}
if (new_size > max_size) {
ret = OB_SIZE_OVERFLOW;
LOG_WARN("buffer size not enough", K(ret));
} else {
void *buf1 = NULL;
void *buf2 = NULL;
if (OB_ISNULL(buf1 = ob_malloc(new_size, attr))
|| OB_ISNULL(buf2 = ob_malloc(new_size, attr))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
} else {
if (OB_NOT_NULL(data_buffer)) {
ob_free(data_buffer);
}
data_buffer = new(buf1) ObLoadFileBuffer(
new_size - sizeof(ObLoadFileBuffer));
if (OB_NOT_NULL(escape_buffer)) {
ob_free(escape_buffer);
}
escape_buffer = new(buf2) ObLoadFileBuffer(
new_size - sizeof(ObLoadFileBuffer));
}
}
LOG_DEBUG("expand buf to", K(new_size));
return ret;
}
int ObLoadDataSPImpl::exec_shuffle(int64_t task_id, ObShuffleTaskHandle *handle)
{
int ret = OB_SUCCESS;
int64_t tenant_id = OB_INVALID_TENANT_ID;
void *expr_buf = NULL;
ObLoadFileBuffer *expr_buffer = NULL;
ObArrayHashMap<ObTabletID, ObDataFrag *> part_buf_mgr;
ObSEArray<ObString, 32> insert_values;
int64_t parsed_line_num = 0;
ObStringBuf str_buf("LoadDataStrBuf", OB_MALLOC_MIDDLE_BLOCK_SIZE);
//为了调用 part_buf_mgr.for_each,使用了匿名函数, &引用了外部的 frag_mgr
auto save_frag = [&] (ObTabletID tablet_id, ObDataFrag *frag) -> bool
{
//将存满数据的frag按照分区放入frag_mgr
int ret = OB_SUCCESS;
ObPartDataFragMgr *part_datafrag_mgr = NULL;
if (OB_FAIL(handle->datafrag_mgr.get_part_datafrag(tablet_id,
part_datafrag_mgr))) {
LOG_WARN("fail to get part datafrag", K(ret), K(tablet_id));
} else if (OB_ISNULL(part_datafrag_mgr)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(part_datafrag_mgr->queue_.push(frag))) {
LOG_WARN("fail to push frag", K(ret));
} else {
ATOMIC_AAF(&(part_datafrag_mgr->total_row_proceduced_), frag->row_cnt);
LOG_DEBUG("saving frag", K(tablet_id), K(*frag));
}
return OB_SUCCESS == ret;
};
auto free_frag = [&] (ObTabletID tablet_id, ObDataFrag *frag) -> bool
{
if (OB_NOT_NULL(frag)) {
handle->datafrag_mgr.distory_datafrag(frag);
}
return true;
};
const int64_t buf_len = handle->data_buffer->get_buffer_size() + sizeof(ObLoadFileBuffer);
if (OB_ISNULL(handle)
|| OB_ISNULL(handle->data_buffer)
|| OB_ISNULL(handle->escape_buffer)
|| OB_ISNULL(handle->exec_ctx.get_my_session())
|| OB_ISNULL(handle->exec_ctx.get_sql_ctx())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KP(handle));
// } else if (FALSE_IT(handle->exec_ctx.get_allocator().reuse())) {
} else if (FALSE_IT(tenant_id = handle->exec_ctx.get_my_session()->get_effective_tenant_id())) {
} else if (OB_FAIL(part_buf_mgr.init(ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA),
handle->datafrag_mgr.get_total_part_cnt()))) {
LOG_WARN("fail to init part buf mgr", K(ret));
} else if (OB_FAIL(insert_values.prepare_allocate(
handle->generator.get_insert_exprs().count()))) {
LOG_WARN("fail to prealloc", K(ret),
"insert values count", handle->generator.get_insert_exprs().count());
} else if (OB_ISNULL(expr_buf = ob_malloc(handle->data_buffer->get_buffer_size() + sizeof(ObLoadFileBuffer),
ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("not enough memory", K(ret));
} else {
handle->err_records.reuse();
expr_buffer = new(expr_buf) ObLoadFileBuffer(handle->data_buffer->get_buffer_size());
ObSEArray<ObCSVGeneralParser::LineErrRec, 1> err_records;
ObSEArray<ObObj, 32> parse_result;
int64_t nrows = 1;
const char *ptr = handle->data_buffer->begin_ptr();
const char *end = handle->data_buffer->begin_ptr() + handle->data_buffer->get_data_len();
auto handle_one_line = [](ObIArray<ObCSVGeneralParser::FieldValue> &fields_per_line) -> int {
UNUSED(fields_per_line);
return common::OB_SUCCESS;
};
if (OB_FAIL(handle->generator.init(*(handle->exec_ctx.get_my_session()), expr_buffer,
handle->exec_ctx.get_sql_ctx()->schema_guard_))) {
LOG_WARN("fail to init buffer", K(ret));
} else if (OB_FAIL(parse_result.prepare_allocate(handle->generator.get_field_exprs().count()))) {
LOG_WARN("fail to allocate", K(ret));
} else {
handle->exec_ctx.set_use_temp_expr_ctx_cache(true);
}
while (OB_SUCC(ret) && ptr < end) {
const char *prev_ptr = ptr; //save the old value of ptr
err_records.reuse();
ret = handle->parser.scan<decltype(handle_one_line), true>(ptr, end, nrows,
handle->escape_buffer->begin_ptr(),
handle->escape_buffer->begin_ptr() + handle->escape_buffer->get_buffer_size(),
handle_one_line, err_records, true);
if (OB_FAIL(ret)) {
LOG_WARN("fail to scan", K(ret));
} else {
if (err_records.count() > 0) {
ObParserErrRec rec;
rec.row_offset_in_task = parsed_line_num;
rec.ret = err_records[0].err_code;
if (OB_FAIL(handle->err_records.push_back(rec))) {
LOG_WARN("fail to push back", K(ret));
}
}
}
if (OB_SUCC(ret) && nrows > 0) {
int64_t cur_line_num = parsed_line_num++;
//计算partition id
ObObj result;
ObTabletID tablet_id;
//insert_values.reuse();
str_buf.reuse();
if (OB_FAIL(handle->generator.fill_field_expr(handle->parser.get_fields_per_line(),
handle->string_values))) {
LOG_WARN("fail to fill field expr", K(ret));
} else if (OB_FAIL(handle->generator.gen_insert_values(insert_values, str_buf))) {
LOG_WARN("fail to generate insert values", K(ret));
} else if (nullptr == handle->calc_tablet_id_expr) {
int64_t idx = task_id % handle->datafrag_mgr.get_tablet_ids().count();
tablet_id = handle->datafrag_mgr.get_tablet_ids().at(idx);
} else {
for (int i = 0; i < handle->parser.get_fields_per_line().count(); ++i) {
ObCSVGeneralParser::FieldValue &str_v = handle->parser.get_fields_per_line().at(i);
handle->row_in_file.get_cell(i) =
static_cast<ObConstRawExpr *>(handle->generator.get_field_exprs().at(i))->get_value();
}
if (OB_FAIL(handle->calc_tablet_id_expr->eval(handle->exec_ctx, handle->row_in_file, result))) {
LOG_WARN("fail to calc tablet id", K(ret));
} else {
tablet_id = ObTabletID(result.get_uint64());
if (OB_UNLIKELY(!tablet_id.is_valid())) {
ret = OB_NO_PARTITION_FOR_GIVEN_VALUE;
LOG_WARN("invalid partition for given value", K(ret));
}
}
}
LOG_DEBUG("LOAD DATA", "TheadId", get_tid_cache(), K(cur_line_num), K(tablet_id),
"line", handle->parser.get_fields_per_line(), "values", insert_values);
//序列化到DataFrag
int64_t len = 0;
OB_UNIS_ADD_LEN(insert_values);
OB_UNIS_ADD_LEN(cur_line_num);
int64_t row_ser_size = len;
OB_UNIS_ADD_LEN(row_ser_size);
ObDataFrag *frag = NULL;
if (OB_SUCC(ret)) {
int temp_ret = part_buf_mgr.get(tablet_id, frag);
bool frag_exist = (OB_SUCCESS == temp_ret);
if (!frag_exist || len > frag->get_remain()) {
//新建一个
ObDataFrag *new_frag = NULL;
if (OB_FAIL(handle->datafrag_mgr.create_datafrag(new_frag, len))) {
LOG_WARN("fail to create data fragment", K(ret));
} else {
if (frag_exist) {
if (OB_UNLIKELY(!save_frag(tablet_id, frag))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to save frag", K(ret));
} else if (OB_FAIL(part_buf_mgr.update(tablet_id, new_frag))) {
//never goes here
LOG_ERROR("fail to install new frag", K(ret));
}
} else {
if (OB_FAIL(part_buf_mgr.insert(tablet_id, new_frag))) {
LOG_ERROR("fail to insert new frag", K(ret));
}
}
if (OB_SUCC(ret)) {
frag = new_frag;
frag->shuffle_task_id = task_id;
} else {
handle->datafrag_mgr.distory_datafrag(new_frag);
}
}
}
}
if (OB_SUCC(ret)) {
char *buf = frag->get_current();
int64_t buf_len = frag->get_remain();
int64_t pos = 0;
OB_UNIS_ENCODE(row_ser_size);
OB_UNIS_ENCODE(cur_line_num);
OB_UNIS_ENCODE(insert_values);
if (OB_SUCC(ret)) {
frag->add_pos(pos);
frag->add_row_cnt(1);
//use the pointer change to calculate the original data size read to the frag
frag->add_orig_data_size(static_cast<int64_t>(ptr - prev_ptr));
}
}
}//end if yield
} //end while
if (OB_SUCC(ret)) {
if (OB_FAIL(part_buf_mgr.for_each(save_frag))) {
LOG_WARN("fail to for each", K(ret));
}
}
}
if (OB_FAIL(ret)) {
part_buf_mgr.for_each(free_frag);
}
if (OB_NOT_NULL(expr_buf)) {
ob_free(expr_buf);
}
handle->result.row_cnt_ = parsed_line_num;
return ret;
}
int ObLoadDataSPImpl::exec_insert(ObInsertTask &task, ObInsertResult& result)
{
UNUSED(result);
int ret = OB_SUCCESS;
int64_t sql_buff_len_init = OB_MALLOC_BIG_BLOCK_SIZE; //2M
int64_t field_buf_len = OB_MAX_VARCHAR_LENGTH;
char *field_buff = NULL;
ObMemAttr attr(task.tenant_id_, ObModIds::OB_SQL_LOAD_DATA);
ObSqlString sql_str;
ObSEArray<ObString, 1> single_row_values;
sql_str.set_attr(attr);
#ifdef TEST_MODE
delay_process_by_probability(INSERT_TASK_DROP_RATE);
#endif
if (OB_ISNULL(field_buff = static_cast<char*>(ob_malloc(field_buf_len, attr)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to ob malloc", K(ret), K(field_buf_len));
}
OZ (single_row_values.reserve(task.column_count_));
OZ (sql_str.extend(sql_buff_len_init));
OZ (sql_str.append(task.insert_stmt_head_));
OZ (sql_str.append(ObString(" values ")));
int64_t deserialized_rows = 0;
for (int64_t buf_i = 0; OB_SUCC(ret) && buf_i < task.insert_value_data_.count(); ++buf_i) {
int64_t pos = 0;
const char* buf = task.insert_value_data_[buf_i].ptr();
int64_t data_len = task.insert_value_data_[buf_i].length();
while (OB_SUCC(ret) && pos < data_len) {
int64_t row_ser_size = 0;
int64_t row_num = 0;
OB_UNIS_DECODE(row_ser_size);
int64_t pos_back = pos;
OB_UNIS_DECODE(row_num);
single_row_values.reuse();
OB_UNIS_DECODE(single_row_values);
if (OB_SUCC(ret) && (pos - pos_back != row_ser_size
|| single_row_values.count() != task.column_count_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("row size is not as expected", "pos diff", pos - pos_back, K(row_ser_size),
"single row values count", single_row_values.count(), K(task.column_count_));
}
//print row
if (deserialized_rows != 0) {
OZ (sql_str.append(",", 1));
}
OZ (sql_str.append("(", 1));
for (int64_t c = 0; OB_SUCC(ret) && c < single_row_values.count(); ++c) {
//bool is_set_value = task.set_values_bitset_.has_member(c);
if (c != 0) {
OZ (sql_str.append(",", 1));
}
OZ (sql_str.append(single_row_values[c]));
}
OZ (sql_str.append(")", 1));
deserialized_rows++;
}
} //end for
if (OB_SUCC(ret) && deserialized_rows != task.row_count_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("data in task not match deserialized result",
K(ret), K(deserialized_rows), K(task.row_count_));
}
if (OB_SUCC(ret)) {
ObTZMapWrap tz_map_wrap;
if (OB_FAIL(OTTZ_MGR.get_tenant_tz(task.tenant_id_, tz_map_wrap))) {
LOG_WARN("get tenant timezone map failed", K(ret));
} else {
task.timezone_.set_tz_info_map(tz_map_wrap.get_tz_map());
}
}
int64_t affected_rows = 0;
ObSessionParam param;
param.is_load_data_exec_ = true;
param.sql_mode_ = &task.sql_mode_;
param.tz_info_wrap_ = &task.timezone_;
if (OB_SUCC(ret) && OB_FAIL(GCTX.sql_proxy_->write(task.tenant_id_,
sql_str.string(),
affected_rows,
get_compatibility_mode(),
&param))) {
LOG_WARN("fail to exec insert remote", K(ret), "task_id", task.task_id_);
}
LOG_DEBUG("LOAD DATA remote process", K(affected_rows), K(task.task_id_), K(ret));
#ifdef TEST_MODE
delay_process_by_probability(INSERT_TASK_DROP_RATE);
#endif
if (OB_NOT_NULL(field_buff)) {
ob_free(field_buff);
}
return ret;
}
int ObLoadDataSPImpl::wait_shuffle_task_return(ToolBox &box)
{
int ret = OB_SUCCESS;
int ret_bak = OB_SUCCESS;
for (int64_t i = 0; i < box.parallel; ++i) {
//ret失败也要循环,保证所有发出的task都返回或超时
ObShuffleTaskHandle *handle = NULL;
if (OB_FAIL(box.shuffle_task_controller.on_next_task())) {
LOG_WARN("fail to on next task", K(ret));
} else if (OB_FAIL(box.shuffle_task_reserve_queue.pop(handle))) {
LOG_WARN("fail to pop shuffle handle", K(ret));
} else if (OB_ISNULL(handle)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("shuffle task handle is null", K(ret));
} else if (OB_UNLIKELY(handle->result.flags_.test_bit(ObTaskResFlag::RPC_TIMEOUT))) {
ret = OB_TRANS_RPC_TIMEOUT;
LOG_WARN("shuffle task rpc timeout handle", K(ret));
} else if (OB_FAIL(handle->result.exec_ret_)) {
LOG_WARN("shuffle remote exec failed", K(ret));
} else if (handle->err_records.count() > 0
&& OB_FAIL(handle_returned_shuffle_task(box, *handle))) {
LOG_WARN("fail to handle returned shuffle task", K(ret));
} else {
box.suffle_rt_sum += handle->result.process_us_;
}
if (OB_FAIL(ret) && OB_SUCCESS == ret_bak) {
ret_bak = ret;
}
}
if (OB_SUCCESS != ret_bak) {
ret = ret_bak;
}
for (int64_t i = 0; OB_SUCC(ret) && i < box.parallel; ++i) {
ObShuffleTaskHandle *handle = box.shuffle_resource[i];
if (OB_FAIL(box.shuffle_task_controller.on_task_finished())) {
LOG_WARN("fail to on next task", K(ret));
} else if (OB_FAIL(box.shuffle_task_reserve_queue.push_back(handle))) {
LOG_WARN("fail to push back", K(ret));
} else if (OB_ISNULL(handle)) {
ret = OB_ERR_UNEXPECTED;
} else {
handle->result.reset();
handle->err_records.reuse();
}
}
return ret;
}
int ObLoadDataSPImpl::handle_returned_shuffle_task(ToolBox &box, ObShuffleTaskHandle &handle)
{
UNUSED(box);
int ret = OB_SUCCESS;
if (OB_UNLIKELY(handle.result.task_id_ >= box.file_buf_row_num.count()
|| handle.result.task_id_ < 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid array index", K(ret),
K(handle.result.task_id_), K(box.file_buf_row_num.count()));
} else if (!box.file_appender.is_opened()
&& OB_FAIL(create_log_file(box))) {
LOG_WARN("fail to create log file", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < handle.err_records.count(); ++i) {
int64_t line_num = box.file_buf_row_num.at(handle.result.task_id_)
+ handle.err_records.at(i).row_offset_in_task;
if (OB_FAIL(log_failed_line(box,
TaskType::ShuffleTask,
handle.result.task_id_,
line_num,
handle.err_records.at(i).ret,
ObString()))) {
LOG_WARN("fail to log failed line", K(ret));
}
}
return ret;
}
int ObLoadDataSPImpl::next_file_buffer(ObExecContext &ctx,
ToolBox &box,
ObShuffleTaskHandle *handle,
int64_t limit)
{
int ret = OB_SUCCESS;
bool has_valid_data = false;
CK (OB_NOT_NULL(handle) && OB_NOT_NULL(handle->data_buffer));
do {
//从data_trimer中恢复出上次读取剩下的数据
OZ (box.data_trimer.recover_incomplate_data(*handle->data_buffer));
OZ (box.file_reader->readn(handle->data_buffer->current_ptr(),
handle->data_buffer->get_remain_len(),
box.read_cursor.read_size_));
if (OB_SUCC(ret)) {
if (OB_LIKELY(box.read_cursor.read_size_ > 0)) {
handle->data_buffer->update_pos(box.read_cursor.read_size_); //更新buffer中数据长度
int64_t last_proccessed_GBs = box.read_cursor.get_total_read_GBs();
box.read_cursor.commit_read();
int64_t processed_GBs = box.read_cursor.get_total_read_GBs();
if (processed_GBs != last_proccessed_GBs) {
LOG_INFO("LOAD DATA file read progress: ", K(processed_GBs));
}
box.job_status->read_bytes_ += box.read_cursor.read_size_;
} else if (box.file_reader->eof()) {
box.read_cursor.is_end_file_ = true;
LOG_DEBUG("LOAD DATA reach file end", K(box.read_cursor));
}
}
//从buffer中找出完整的行,剩下的备份到 data_trimer
if (OB_SUCC(ret) && OB_LIKELY(handle->data_buffer->is_valid())) {
int64_t complete_cnt = limit;
int64_t complete_len = 0;
if (OB_FAIL(pre_parse_lines(*handle->data_buffer, box.parser,
box.read_cursor.is_end_file(),
complete_len, complete_cnt))) {
LOG_WARN("fail to fast_lines_parse", K(ret));
} else if (OB_FAIL(box.data_trimer.backup_incomplate_data(*handle->data_buffer,
complete_len))) {
LOG_WARN("fail to back up data", K(ret));
} else {
box.data_trimer.commit_line_cnt(complete_cnt);
has_valid_data = complete_cnt > 0;
LOG_DEBUG("LOAD DATA",
"split offset", box.read_cursor.file_offset_ - box.data_trimer.get_incomplate_data_string().length(),
K(complete_len), K(complete_cnt),
"incomplate data length", box.data_trimer.get_incomplate_data_string().length(),
"incomplate data", box.data_trimer.get_incomplate_data_string());
}
}
} while (OB_SUCC(ret) && !has_valid_data && !box.read_cursor.is_end_file_
&& OB_SUCC(handle->expand_buf(box.batch_buffer_size))
&& OB_SUCC(box.data_trimer.expand_buf(ctx.get_allocator())));
return ret;
}
int ObLoadDataSPImpl::shuffle_task_gen_and_dispatch(ObExecContext &ctx, ToolBox &box)
{
UNUSED(ctx);
int ret = OB_SUCCESS;
ObShuffleTaskHandle *handle = nullptr;
int64_t task_id = 0;
for (int64_t i = 0;
OB_SUCC(ret) && !box.read_cursor.is_end_file() && i < box.data_frag_mem_usage_limit;
++i) {
// wait a buffer from controller
if (OB_FAIL(box.shuffle_task_controller.on_next_task())) {
LOG_WARN("fail to get task id", K(ret));
} else if (OB_FAIL(box.shuffle_task_reserve_queue.pop(handle))) {
LOG_WARN("fail to pop buffer", K(ret));
} else if (OB_ISNULL(handle)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("handle is null", K(ret));
} else if (OB_UNLIKELY(handle->result.flags_.test_bit(ObTaskResFlag::RPC_TIMEOUT))) {
ret = OB_TRANS_RPC_TIMEOUT;
LOG_WARN("shuffle task rpc timeout handle", K(ret));
} else if (OB_FAIL(handle->result.exec_ret_)) {
LOG_WARN("shuffle task exec failed", K(ret), "task_id", handle->result.task_id_);
} else if (OB_UNLIKELY(handle->err_records.count() > 0)
&& OB_FAIL(handle_returned_shuffle_task(box, *handle))) {
LOG_WARN("handle returned shuffle task", K(ret));
} else {
box.suffle_rt_sum += handle->result.process_us_;
task_id = box.shuffle_task_controller.get_next_task_id();
handle->data_buffer->reset();
handle->result = ObShuffleResult();
handle->result.task_id_ = task_id;
handle->err_records.reuse();
box.job_status->shuffle_rt_sum_ = box.suffle_rt_sum;
box.job_status->total_shuffle_task_ = box.shuffle_task_controller.get_total_task_cnt();
}
if (OB_SUCC(ret)) {
if (OB_FAIL(box.file_buf_row_num.push_back(box.data_trimer.get_lines_count()))) {
LOG_WARN("fail to push back", K(ret));
} else if (OB_FAIL(next_file_buffer(ctx, box, handle))) {
LOG_WARN("fail get next file buffer", K(ret));
}
}
if (OB_SUCC(ret)) {
ObRpcLoadDataShuffleTaskCallBack mycallback(box.shuffle_task_controller,
box.shuffle_task_reserve_queue,
handle);
if (OB_UNLIKELY(handle->data_buffer->get_data_len() <= 0)) {
ret = mycallback.release_resouce();
} else {
ObShuffleTask task;
task.task_id_ = task_id;
task.gid_ = box.gid;
if (OB_FAIL(task.shuffle_task_handle_.set_arg(handle))) {
LOG_WARN("fail to set arg", K(ret));
} else {
if (OB_FAIL(GCTX.load_data_proxy_->to(box.self_addr)
.by(box.tenant_id)
.timeout(box.txn_timeout)
.ap_load_data_shuffle(task, &mycallback))) {
LOG_WARN("load data proxy post rpc failed", K(ret));
}
}
}
}
if (OB_FAIL(ret)) {
box.shuffle_task_controller.on_task_finished();
}
}
return ret;
}
int ObLoadDataSPImpl::create_log_file(ToolBox &box)
{
int ret = OB_SUCCESS;
if (OB_FAIL(box.file_appender.open(box.log_file_name, false, true))) {
LOG_WARN("fail to open file", K(ret), K(box.log_file_name));
} else if (OB_FAIL(box.file_appender.append(box.load_info.ptr(),
box.load_info.length(),
false))) {
LOG_WARN("fail to append file", K(ret));
} else if (OB_FAIL(box.file_appender.append(log_file_column_names,
strlen(log_file_column_names),
false))) {
LOG_WARN("fail to append file", K(ret));
}
return ret;
}
int ObLoadDataSPImpl::log_failed_line(ToolBox &box,
TaskType task_type,
int64_t task_id,
int64_t line_num,
int err_code,
ObString err_msg)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(box.expr_buffer)
|| !box.file_appender.is_opened()) {
ret = OB_NOT_INIT;
LOG_WARN("box not init", K(ret));
} else {
box.expr_buffer->reset();
int64_t log_buf_pos = 0;
//int err_no = ob_errpkt_errno(err_code, box.is_oracle_mode);
if (err_msg.empty()) {
err_msg = ob_errpkt_strerror(err_code, box.is_oracle_mode);
}
if (OB_FAIL(databuff_printf(box.expr_buffer->begin_ptr(),
box.expr_buffer->get_buffer_size(),
log_buf_pos,
log_file_row_fmt,
task_id + 1,
line_num + 1,
task_type == TaskType::ShuffleTask ? "WARN" : "ERROR",
err_code,
err_msg.length(),
err_msg.ptr()))) {
LOG_WARN("fail to printf", K(ret), K(err_msg));
} else if (OB_FAIL(box.file_appender.append(box.expr_buffer->begin_ptr(),
log_buf_pos,
false))) {
LOG_WARN("fail to append file", K(ret), K(log_buf_pos));
} else {
LOG_DEBUG("LOAD DATA log failed rows", K(task_id), K(line_num), K(task_type));
}
}
return ret;
}
int ObLoadDataSPImpl::log_failed_insert_task(ToolBox &box, ObInsertTask &task)
{
int ret = OB_SUCCESS;
int log_err = OB_SUCCESS;
int row_counter = 0;
if (!box.file_appender.is_opened()
&& OB_FAIL(create_log_file(box))) {
LOG_WARN("fail to create log file", K(ret));
} else {
log_err = task.result_.exec_ret_;
LOG_DEBUG("check task result", K(task.result_));
}
for (int64_t buf_i = 0; OB_SUCC(ret) && buf_i < task.insert_value_data_.count(); ++buf_i) {
int64_t pos = 0;
const char* buf = task.insert_value_data_[buf_i].ptr();
int64_t data_len = task.insert_value_data_[buf_i].length();
ObDataFrag *frag = NULL;
int64_t line_num_base = 0;
if (OB_ISNULL(frag = static_cast<ObDataFrag *>(task.source_frag_[buf_i]))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("source data frag is NULL", K(buf_i), K(ret), K(task));
} else if (OB_UNLIKELY(OB_INVALID_ID == frag->shuffle_task_id
|| frag->shuffle_task_id >= box.file_buf_row_num.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("shuffle task id is invalid", K(ret), K(frag->shuffle_task_id));
} else {
line_num_base = box.file_buf_row_num.at(frag->shuffle_task_id);
}
while (OB_SUCC(ret) && pos < data_len) {
int64_t row_ser_size = 0;
int64_t row_num = 0;
OB_UNIS_DECODE(row_ser_size);
int64_t pos_back = pos;
OB_UNIS_DECODE(row_num);
int64_t line_num = line_num_base + row_num;
row_counter++;
if (task.result_.err_line_no_ == row_counter) {
OZ (log_failed_line(box, TaskType::InsertTask, task.task_id_, line_num, log_err,
task.result_.err_msg_));
}
pos = pos_back + row_ser_size;
}
} //end for
return ret;
}
int ObLoadDataSPImpl::handle_returned_insert_task(ObExecContext &ctx,
ToolBox &box,
ObInsertTask &insert_task,
bool &need_retry)
{
int ret = OB_SUCCESS;
ObPartDataFragMgr *part_mgr = NULL;
ObLoadServerInfo *server_info = NULL;
ObInsertResult &result = insert_task.result_;
enum TASK_STATUS {TASK_SUCC, TASK_NEED_RETRY, TASK_FAILED} task_status = TASK_FAILED;
if (OB_ISNULL(part_mgr = insert_task.part_mgr)
|| OB_ISNULL(server_info = box.server_infos.at(insert_task.token_server_idx_))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid insert task", K(ret), K(insert_task));
}
if (OB_SUCC(ret)
&& result.flags_.test_bit(ObTaskResFlag::NEED_WAIT_MINOR_FREEZE)) {
int64_t last_ts = 0;
ObAddr &addr = part_mgr->get_leader_addr();
bool found = (OB_SUCCESS == box.server_last_available_ts.get(addr, last_ts));
if (insert_task.result_recv_ts_ > last_ts) {
bool is_leader_changed = false;
if (OB_FAIL(memory_wait_local(ctx, part_mgr->tablet_id_,
addr, box.wait_secs_for_mem_release,
is_leader_changed))) {
LOG_WARN("fail to memory_wait_local", K(ret));
} else {
int64_t curr_time = ObTimeUtil::current_time();
if (is_leader_changed) {
found = (OB_SUCCESS == box.server_last_available_ts.get(addr, last_ts));
}
ret = found ? box.server_last_available_ts.update(addr, curr_time)
: box.server_last_available_ts.insert(addr, curr_time);
if (OB_FAIL(ret)) {
LOG_WARN("failed to update server_last_available_ts",
K(ret), K(addr), K(found), K(is_leader_changed));
}
}
}
}
bool can_retry = (ObLoadDupActionType::LOAD_REPLACE == box.insert_mode
|| ObLoadDupActionType::LOAD_IGNORE == box.insert_mode)
&& insert_task.retry_times_ < ObInsertTask::RETRY_LIMIT;
if (OB_SUCC(ret)) {
int err = result.exec_ret_;
if (OB_LIKELY(OB_SUCCESS == err
&& !result.flags_.test_bit(ObTaskResFlag::RPC_TIMEOUT))) {
task_status = TASK_SUCC;
} else if (result.flags_.test_bit(ObTaskResFlag::RPC_TIMEOUT)) {
task_status = can_retry ? TASK_NEED_RETRY : TASK_FAILED;
if (TASK_FAILED == task_status) {
result.exec_ret_ = OB_TIMEOUT;
}
} else if (is_server_down_error(err)
|| is_master_changed_error(err)
|| is_partition_change_error(err)) {
task_status = can_retry ? TASK_NEED_RETRY : TASK_FAILED;
if (OB_FAIL(part_mgr->update_part_location(ctx))) {
LOG_WARN("fail to update location cache", K(ret));
}
} else {
//由于意外错误导致失败,默认
task_status = TASK_FAILED;
}
}
if (OB_SUCC(ret)) {
switch (task_status) {
case TASK_SUCC:
box.affected_rows += insert_task.row_count_;
box.insert_rt_sum += insert_task.process_us_;
/* RESERVE FOR DEBUG
box.handle_returned_insert_task_count++;
if (insert_task.row_count_ != DEFAULT_BUFFERRED_ROW_COUNT) {
LOG_WARN("LOAD DATA task return",
"task_id", insert_task.task_id_,
"affected_rows", box.affected_rows,
"row_count", insert_task.row_count_);
}
*/
box.job_status->parsed_rows_ = box.affected_rows;
box.job_status->parsed_bytes_ += insert_task.data_size_;
box.job_status->total_insert_task_ = box.insert_task_controller.get_total_task_cnt();
box.job_status->insert_rt_sum_ = box.insert_rt_sum;
box.job_status->total_wait_secs_ = box.wait_secs_for_mem_release;
break;
case TASK_NEED_RETRY:
insert_task.retry_times_++;
need_retry = true;
LOG_WARN("LOAD DATA task need retry",
"execute server", server_info->addr,
"task_id", insert_task.task_id_,
"ret", result.exec_ret_,
"row_count", insert_task.row_count_);
break;
case TASK_FAILED:
if (OB_SUCCESS != log_failed_insert_task(box, insert_task)) {
LOG_WARN("fail to log failed insert task");
}
LOG_WARN("LOAD DATA task failed",
"execute server", server_info->addr,
"task_id", insert_task.task_id_,
"ret", result.exec_ret_,
"row_count", insert_task.row_count_);
ret = result.exec_ret_;
break;
default:
ret = OB_ERR_UNEXPECTED;
break;
}
}
return ret;
}
int ObLoadDataSPImpl::wait_insert_task_return(ObExecContext &ctx, ToolBox &box)
{
int ret = OB_SUCCESS;
int ret_bak = OB_SUCCESS;
for (int64_t returned_cnt = 0; returned_cnt < box.parallel; ++returned_cnt) {
//ret失败也要循环,保证所有发出的task都返回或超时
ObInsertTask *insert_task = NULL;
bool need_retry = false;
if (OB_FAIL(box.insert_task_controller.on_next_task())) {
LOG_WARN("fail to get next task id", K(ret));
} else if (OB_FAIL(box.insert_task_reserve_queue.pop(insert_task))) {
LOG_WARN("fail to pop", K(ret));
} else if (OB_ISNULL(insert_task)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("insert task is null", K(ret));
} else if (!insert_task->is_empty_task()
&& OB_FAIL(handle_returned_insert_task(ctx,
box,
*insert_task,
need_retry))) {
LOG_WARN("fail to handle returned insert task", K(ret));
} else if (OB_LIKELY(!need_retry)) {
//do nothing
} else {
ObRpcLoadDataInsertTaskCallBack mycallback(box.insert_task_controller,
box.insert_task_reserve_queue,
insert_task);
OZ (ObLoadDataUtils::check_session_status(*ctx.get_my_session()));
if (OB_SUCC(ret)) {
if (OB_FAIL(GCTX.load_data_proxy_->to(insert_task->part_mgr->get_leader_addr())
.by(box.tenant_id)
.timeout(box.txn_timeout)
.ap_load_data_insert(*insert_task, &mycallback))) {
LOG_WARN("load data proxy post rpc failed", K(ret));
} else {
--returned_cnt;
}
}
}
if (OB_FAIL(ret) && OB_SUCCESS == ret_bak) {
ret_bak = ret;
}
}
if (OB_SUCCESS != ret_bak) {
ret = ret_bak;
}
for (int64_t i = 0; OB_SUCC(ret) && i < box.parallel; ++i) {
ObInsertTask *insert_task = box.insert_resource[i];
if (OB_FAIL(box.insert_task_controller.on_task_finished())) {
LOG_WARN("fail to on task finish", K(ret));
} else if (OB_FAIL(box.insert_task_reserve_queue.push_back(insert_task))) {
LOG_WARN("fail to push back", K(ret));
} else if (OB_ISNULL(insert_task)) {
ret = OB_ERR_UNEXPECTED;
} else {
insert_task->reuse();
}
}
return ret;
}
int ObLoadDataSPImpl::insert_task_send(ObInsertTask *insert_task, ToolBox &box)
{
int ret = OB_SUCCESS;
ObRpcLoadDataInsertTaskCallBack mycallback(box.insert_task_controller,
box.insert_task_reserve_queue,
insert_task);
if (OB_ISNULL(insert_task)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(GCTX.load_data_proxy_->to(insert_task->part_mgr->get_leader_addr())
.by(box.tenant_id)
.timeout(box.txn_timeout)
.ap_load_data_insert(*insert_task, &mycallback))) {
LOG_WARN("load data proxy post rpc failed", K(ret));
}
return ret;
}
int ObLoadDataSPImpl::insert_task_gen_and_dispatch(ObExecContext &ctx, ToolBox &box)
{
int ret = OB_SUCCESS;
const int64_t total_server_n = box.server_infos.count();
int64_t part_iters[total_server_n];
MEMSET(part_iters, 0, sizeof(part_iters));
int64_t token_cnt = box.insert_task_controller.get_max_parallelism();
while (token_cnt > 0) {
ObInsertTask *insert_task = NULL;
bool need_retry = false;
bool task_send_out = false;
OW (box.insert_task_controller.on_next_task());
if (OB_SUCC(ret)) {
if (OB_FAIL(box.insert_task_reserve_queue.pop(insert_task))) {
LOG_WARN("fail to pop", K(ret));
} else if (OB_ISNULL(insert_task)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("insert task is null", K(ret));
} else if (!insert_task->is_empty_task()
&& OB_FAIL(handle_returned_insert_task(ctx,
box,
*insert_task,
need_retry))) {
LOG_WARN("fail to handle returned insert task", K(ret));
} else if (OB_UNLIKELY(need_retry)) {
//CASE1: for retry old insert task
LOG_DEBUG("LOAD DATA need retry", KPC(insert_task));
if (OB_FAIL(insert_task_send(insert_task, box))) {
LOG_WARN("fail to send insert task", K(ret));
} else {
task_send_out = true;
}
} else {
int64_t &part_iter = part_iters[insert_task->token_server_idx_];
ObLoadServerInfo *server_info = box.server_infos.at(insert_task->token_server_idx_);
ObPartDataFragMgr *part_datafrag_mgr = nullptr;
int64_t row_count = box.batch_row_count;
bool iter_end = true;
//find next batch data on this server
for (; part_iter < server_info->part_datafrag_group.count(); ++part_iter) {
part_datafrag_mgr = server_info->part_datafrag_group.at(part_iter);
row_count = box.batch_row_count;
if (part_datafrag_mgr->has_data(row_count)
|| (box.read_cursor.is_end_file()
&& 0 != (row_count = part_datafrag_mgr->remain_row_count()))) {
iter_end = false;
break;
}
}
if (!insert_task->is_empty_task()) {
insert_task->reuse();
}
if (iter_end) {
//CASE2: all task on this server are done
task_send_out = false;
LOG_DEBUG("LOAD DATA all jobs are finish", K(server_info->addr), K(token_cnt));
} else {
//CASE3: for new insert task
insert_task->part_mgr = part_datafrag_mgr;
insert_task->task_id_ = box.insert_task_controller.get_next_task_id();
if (OB_FAIL(part_datafrag_mgr->next_insert_task(row_count, *insert_task))) {
LOG_WARN("fail to generate insert task", K(ret));
} else {
box.insert_dispatch_rows += row_count;
box.insert_task_count++;
if (row_count != DEFAULT_BUFFERRED_ROW_COUNT) {
LOG_DEBUG("LOAD DATA task generate",
"task_id", insert_task->task_id_,
"affected_rows", box.affected_rows,
K(row_count));
}
if (OB_FAIL(insert_task_send(insert_task, box))) {
LOG_WARN("fail to send insert task", K(ret));
} else {
task_send_out = true;
}
}
}
}
}
if (!task_send_out) {
token_cnt--;
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < box.insert_task_controller.get_max_parallelism(); ++i) {
ObInsertTask *insert_task = box.insert_resource[i];
if (OB_FAIL(box.insert_task_controller.on_task_finished())) {
LOG_WARN("fail to on task finish", K(ret));
} else if (OB_FAIL(box.insert_task_reserve_queue.push_back(insert_task))) {
LOG_WARN("fail to push back", K(ret));
} else if (OB_ISNULL(insert_task)) {
ret = OB_ERR_UNEXPECTED;
} else {
insert_task->reuse();
}
}
return ret;
}
int ObLoadDataSPImpl::execute(ObExecContext &ctx, ObLoadDataStmt &load_stmt)
{
int ret = OB_SUCCESS;
HEAP_VAR(ToolBox, box) {
//init toolbox
OZ (box.init(ctx, load_stmt));
LOG_INFO("LOAD DATA start report"
, "file_path", load_stmt.get_load_arguments().file_name_
, "table_name", load_stmt.get_load_arguments().combined_name_
, "batch_size", box.batch_row_count
, "parallel", box.parallel
, "load_mode", box.insert_mode
, "transaction_timeout", box.txn_timeout
);
//ignore rows
while (OB_SUCC(ret)
&& !box.read_cursor.is_end_file()
&& box.data_trimer.get_lines_count() < box.ignore_rows) {
OZ (next_file_buffer(ctx, box, box.temp_handle,
box.ignore_rows - box.data_trimer.get_lines_count()));
LOG_DEBUG("LOAD DATA ignore rows", K(box.ignore_rows), K(box.data_trimer.get_lines_count()));
}
//main while
while (OB_SUCC(ret) && !box.read_cursor.is_end_file()) {
/* 执行分两步并行
* 1. 并行计算分区 (shuffle_task_gen_and_dispatch)
* 2. 并行插入 (insert_task_gen_and_dispatch)
* 每次循环从文件读取 data_frag_mem_usage_limit * MAX_BUFFER_SIZE = 100M 在内存缓存
*/
OZ (shuffle_task_gen_and_dispatch(ctx, box));
OW (wait_shuffle_task_return(box));
OZ (insert_task_gen_and_dispatch(ctx, box));
//OW (wait_insert_task_return(ctx, box));
/* 所有异步task都已经返回了,这些task依赖的datafrag可以被释放
*/
OW (box.data_frag_mgr.free_unused_datafrag());
/* 检查session是否有效,无效时可直接退出
*/
OZ (ObLoadDataUtils::check_session_status(*ctx.get_my_session()));
}
//release
OW (box.release_resources());
if (OB_SUCC(ret) && OB_NOT_NULL(ctx.get_physical_plan_ctx())) {
ctx.get_physical_plan_ctx()->set_affected_rows(box.affected_rows);
ctx.get_physical_plan_ctx()->set_row_matched_count(box.data_trimer.get_lines_count());
}
if (OB_NOT_NULL(ctx.get_my_session())) {
ctx.get_my_session()->reset_cur_phy_plan_to_null();
}
if (OB_FAIL(ret)) {
LOG_WARN("LOAD DATA execute failed, ", K(ret));
}
if (box.file_appender.is_opened()) {
LOG_WARN("LOAD DATA error log generated");
}
LOG_INFO("LOAD DATA finish report"
, "total shuffle task", box.shuffle_task_controller.get_total_task_cnt()
, "total insert task", box.insert_task_controller.get_total_task_cnt()
, "insert rt sum", box.insert_rt_sum
, "suffle rt sum", box.suffle_rt_sum
, "total wait secs", box.wait_secs_for_mem_release
, "datafrag info", box.data_frag_mgr
);
}
return ret;
}
int ObLoadFileDataTrimer::recover_incomplate_data(ObLoadFileBuffer &buffer)
{
int ret = OB_SUCCESS;
char *buf = NULL;
if (OB_ISNULL(buf = buffer.begin_ptr())) {
ret = OB_INVALID_ARGUMENT;
} else if (incomplate_data_len_ > 0) {
MEMCPY(buf, incomplate_data_, incomplate_data_len_);
buffer.update_pos(incomplate_data_len_);
}
return ret;
}
int ObLoadFileDataTrimer::backup_incomplate_data(ObLoadFileBuffer &buffer, int64_t valid_data_len)
{
int ret = OB_SUCCESS;
incomplate_data_len_ = buffer.get_data_len() - valid_data_len;
if (incomplate_data_len_ > incomplate_data_buf_len_) {
ret = OB_SIZE_OVERFLOW;
LOG_WARN("size over flow", K(ret), K(incomplate_data_len_), K(incomplate_data_buf_len_));
} else if (incomplate_data_len_ > 0 && NULL != incomplate_data_) {
MEMCPY(incomplate_data_, buffer.begin_ptr() + valid_data_len, incomplate_data_len_);
buffer.update_pos(-incomplate_data_len_);
}
return ret;
}
int ObPartDataFragMgr::rowoffset2pos(ObDataFrag *frag, int64_t row_num, int64_t &pos)
{
int ret = OB_SUCCESS;
pos = 0;
if (OB_ISNULL(frag)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret));
} else {
char *buf = frag->data;
int64_t data_len = frag->frag_pos;
for (int64_t i = 0; OB_SUCC(ret) && i < row_num; ++i) {
int64_t row_len = 0;
OB_UNIS_DECODE(row_len);
pos+=row_len;
}
}
return ret;
}
int ObPartDataFragMgr::free_frags()
{
int ret = OB_SUCCESS;
for (int64_t i = 0; i < frag_free_list_.count(); ++i) {
data_frag_mgr_.distory_datafrag(frag_free_list_[i]);
}
frag_free_list_.reuse();
return ret;
}
int ObPartDataFragMgr::clear()
{
int ret = OB_SUCCESS;
ObLink *link = NULL;
if (!has_data(1)) {
//do nothing
} else {
while (OB_SUCC(ret) && OB_EAGAIN != queue_.pop(link)) {
data_frag_mgr_.distory_datafrag(static_cast<ObDataFrag *>(link));
}
}
return ret;
}
int ObPartDataFragMgr::next_insert_task(int64_t batch_row_count, ObInsertTask &task)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(batch_row_count <= 0)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_UNLIKELY(!has_data(batch_row_count))) {
ret = OB_EAGAIN; //for now, never reach here
} else {
total_row_consumed_ += batch_row_count;
}
ObLink *link = NULL;
ObDataFrag *frag = NULL;
int64_t row_count = -queue_top_begin_point_.frag_row_pos_;
InsertTaskSplitPoint new_top_begin_point;
while (OB_SUCC(ret) && row_count < batch_row_count) {
new_top_begin_point.reset();
//handle one frag from head
while (OB_EAGAIN == queue_.top(link)) { pause(); }
if (OB_ISNULL(frag = static_cast<ObDataFrag *>(link))) {
ret = OB_ERR_UNEXPECTED;
} else if ((row_count += frag->row_cnt) > batch_row_count) {
//case1 frag has data remained,do not pop
new_top_begin_point.frag_row_pos_ = frag->row_cnt - (row_count - batch_row_count);
if (OB_FAIL(rowoffset2pos(frag,
new_top_begin_point.frag_row_pos_,
new_top_begin_point.frag_data_pos_))) {
LOG_WARN("fail to rowoffset to pos", K(ret));
} else if (OB_FAIL(task.insert_value_data_.push_back(
ObString(new_top_begin_point.frag_data_pos_ - queue_top_begin_point_.frag_data_pos_,
frag->data + queue_top_begin_point_.frag_data_pos_)))) {
LOG_WARN("fail to do push back", K(ret));
} else if (OB_FAIL(task.source_frag_.push_back(frag))) {
LOG_WARN("fail to push back frag", K(ret));
}
} else {
//case2 frag is empty,need pop
if (OB_FAIL(queue_.pop(link))) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(frag_free_list_.push_back(frag))) {
//TODO free frag for failure
LOG_WARN("fail to push back", K(ret));
} else {
if (OB_FAIL(task.insert_value_data_.push_back(
ObString(frag->frag_pos - queue_top_begin_point_.frag_data_pos_,
frag->data + queue_top_begin_point_.frag_data_pos_)))) {
LOG_WARN("fail to do push back", K(ret));
} else if (OB_FAIL(task.source_frag_.push_back(frag))) {
LOG_WARN("fail to push back frag", K(ret));
}
task.data_size_ += frag->orig_data_size;
}
}
queue_top_begin_point_ = new_top_begin_point;
}
task.row_count_ = batch_row_count;
LOG_DEBUG("next_insert_task", K(task));
return ret;
}
int ObDataFragMgr::free_unused_datafrag()
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids_.count(); ++i) {
ObTabletID tablet_id = tablet_ids_[i];
ObPartDataFragMgr *part_data_frag = NULL;
if (OB_FAIL(get_part_datafrag(tablet_id, part_data_frag))) {
LOG_WARN("fail to get part datafrag", K(ret), K(tablet_id));
} else if (OB_ISNULL(part_data_frag)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("part data frag is null", K(ret));
} else if (OB_FAIL(part_data_frag->free_frags())) {
LOG_WARN("fail to free frag", K(ret));
}
}
return ret;
}
int ObDataFragMgr::clear_all_datafrag()
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids_.count(); ++i) {
ObTabletID tablet_id = tablet_ids_[i];
ObPartDataFragMgr *part_data_frag = NULL;
if (OB_FAIL(get_part_datafrag(tablet_id, part_data_frag))) {
LOG_WARN("fail to get part datafrag", K(ret), K(tablet_id));
} else if (OB_ISNULL(part_data_frag)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("part data frag is null", K(ret));
} else if (OB_FAIL(part_data_frag->clear())) {
LOG_WARN("fail to free frag", K(ret));
} else {
part_data_frag->~ObPartDataFragMgr();
}
}
return ret;
}
int ObDataFragMgr::init(ObExecContext &ctx, uint64_t table_id)
{
int ret = OB_SUCCESS;
ObSchemaGetterGuard *schema_guard = NULL;
const ObTableSchema *table_schema = NULL;
ObSEArray<ObObjectID, 4> part_ids;
tablet_ids_.reset();
if (OB_ISNULL(ctx.get_sql_ctx())
|| OB_ISNULL(schema_guard = ctx.get_sql_ctx()->schema_guard_)
|| OB_ISNULL(ctx.get_my_session())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("sql ctx is null", K(ret), KP(ctx.get_sql_ctx()));
} else if (OB_FAIL(schema_guard->get_table_schema(
ctx.get_my_session()->get_effective_tenant_id(),
table_id, table_schema))) {
LOG_WARN("fail to get partition count", K(ret));
} else if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("table schema is NULL", K(ret));
} else if (OB_FAIL(table_schema->get_all_tablet_and_object_ids(tablet_ids_, part_ids))) {
LOG_WARN("failed to get partition ids", K(ret));
} else {
LOG_INFO("table partition ids", K(tablet_ids_));
total_part_cnt_ = tablet_ids_.count();
}
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids_.count(); ++i) {
ObTabletID tablet_id = tablet_ids_[i];
ObPartDataFragMgr *part_data_frag = NULL;
if (OB_ISNULL(part_data_frag
= OB_NEWx(ObPartDataFragMgr,
(&ctx.get_allocator()),
*this,
ctx.get_my_session()->get_effective_tenant_id(),
tablet_id))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret));
} else if (FALSE_IT(part_data_frag->tablet_id_ = tablet_id)) {
} else if (OB_FAIL(part_data_frag->update_part_location(ctx))) {
LOG_WARN("fail to update part locatition", K(ret));
} else if (OB_FAIL(part_datafrag_map_.set_refactored(part_data_frag))) {
LOG_WARN("fail to set hash map", K(ret));
} else if (OB_FAIL(part_bitset_.add_member(i))) {
LOG_WARN("fail to add bitset", K(ret));
}
}
if (OB_SUCC(ret)) {
attr_.tenant_id_ = ctx.get_my_session()->get_effective_tenant_id();
attr_.label_ = common::ObModIds::OB_SQL_LOAD_DATA;
//attr_.ctx_id_ = common::ObCtxIds::WORK_AREA;
total_alloc_cnt_ = 0;
total_free_cnt_ = 0;
}
return ret;
}
int ObDataFragMgr::get_part_datafrag(ObTabletID tablet_id,
ObPartDataFragMgr *&part_datafrag_mgr)
{
return part_datafrag_map_.get_refactored(tablet_id, part_datafrag_mgr);
}
int ObDataFragMgr::create_datafrag(ObDataFrag *&frag, int64_t min_len) {
int ret = OB_SUCCESS;
frag = NULL;
void *buf = NULL;
int64_t min_alloc_size = sizeof(ObDataFrag) + min_len;
int64_t opt_alloc_size = 0;
if (min_alloc_size <= ObDataFrag::DEFAULT_STRUCT_SIZE) {
opt_alloc_size = ObDataFrag::DEFAULT_STRUCT_SIZE;
} else if (min_alloc_size >= OB_MALLOC_BIG_BLOCK_SIZE) {
opt_alloc_size = min_alloc_size;
} else {
opt_alloc_size = ObDataFrag::DEFAULT_STRUCT_SIZE
* ((min_alloc_size - 1) / ObDataFrag::DEFAULT_STRUCT_SIZE + 1);
}
if (OB_ISNULL(buf = ob_malloc(opt_alloc_size, attr_))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to malloc", K(ret), KP(this));
} else {
frag = new(buf) ObDataFrag(opt_alloc_size);
ATOMIC_AAF(&total_alloc_cnt_, 1);
}
return ret;
}
void ObDataFragMgr::distory_datafrag(ObDataFrag *frag) {
if (OB_ISNULL(frag)) {
//do nothing
} else {
frag->~ObDataFrag();
ob_free(frag);
total_free_cnt_++;
}
}
int ObPartDataFragMgr::update_part_location(ObExecContext &ctx)
{
int ret = OB_SUCCESS;
const int64_t retry_us = 200 * 1000;
const int64_t retry_timeout =
std::min(ObTimeUtil::current_time() + 30 * USECS_PER_SEC, // the RTO is 30s
ctx.get_my_session()->get_query_timeout_ts());
if (OB_UNLIKELY(!tablet_id_.is_valid())) {
ret = OB_NOT_INIT;
LOG_WARN("invalid partition key", K(ret));
} else {
bool force_renew = false;
ObDASLocationRouter &loc_router = DAS_CTX(ctx).get_location_router();
do {
const int64_t expire_renew_time = force_renew ? INT64_MAX : 0;
if (OB_FAIL(loc_router.get_leader(tenant_id_, tablet_id_, leader_addr_, expire_renew_time))) {
if (is_location_service_renew_error(ret) && !force_renew) {
// retry one time
force_renew = true;
LOG_WARN("failed to get location and force renew", K(ret), K(tablet_id_));
} else {
LOG_WARN("failed to get location", K(ret), K(tablet_id_));
if (ObTimeUtil::current_time() + retry_us > retry_timeout) {
force_renew = false;
} else {
ob_usleep(retry_us);
}
}
} else {
LOG_DEBUG("get participants", K(tablet_id_), K(leader_addr_));
}
} while (is_location_service_renew_error(ret) && force_renew);
}
return ret;
}
int ObLoadFileDataTrimer::expand_buf(ObIAllocator &allocator)
{
int ret = OB_SUCCESS;
int64_t new_buf_len = incomplate_data_buf_len_ * (NULL != incomplate_data_ ? 2 : 1);
char *new_buf = NULL;
if (OB_ISNULL(new_buf = static_cast<char*>(allocator.alloc(new_buf_len)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no memory", K(ret));
} else {
if (NULL != incomplate_data_) {
MEMCPY(new_buf, incomplate_data_, incomplate_data_len_);
}
incomplate_data_ = new_buf;
incomplate_data_buf_len_ = new_buf_len;
}
return ret;
}
int ObLoadFileDataTrimer::init(ObIAllocator &allocator, const ObCSVFormats &formats)
{
formats_ = formats;
return expand_buf(allocator);
}
int ObLoadDataSPImpl::ToolBox::release_resources()
{
int ret = OB_SUCCESS;
if (gid.is_valid()) {
ObLoadDataStat *job_status = nullptr;
if (OB_FAIL(ObGlobalLoadDataStatMap::getInstance()->unregister_job(gid, job_status))) {
LOG_ERROR("fail to unregister job", K(ret), K(gid));
} else if (OB_ISNULL(job_status)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("fail to unregister job", K(ret), K(gid));
} else {
int64_t log_print_cnt = 0;
int64_t ref_cnt = 0;
while ((ref_cnt = job_status->get_ref_cnt()) > 0) {
ob_usleep(WAIT_INTERVAL_US); //1s
if ((log_print_cnt++) % 10 == 0) {
LOG_WARN("LOAD DATA wait job handle release",
K(ret), "wait_seconds", log_print_cnt * 10, K(gid), K(ref_cnt));
}
}
job_status->~ObLoadDataStat();
}
}
//release sessions in shuffle task
for (int64_t i = 0; i < shuffle_resource.count(); ++i) {
ObShuffleTaskHandle *handle = NULL;
int tmp_ret = OB_SUCCESS;
if (OB_ISNULL(handle = shuffle_resource[i])) {
tmp_ret = OB_ERR_UNEXPECTED;
LOG_ERROR("shuffle task handle is null, can not release the memory", K(tmp_ret));
} else {
handle->~ObShuffleTaskHandle();
}
if (OB_SUCC(ret) && OB_SUCCESS != tmp_ret) {
ret = tmp_ret;
}
}
for (int64_t i = 0; i < insert_resource.count(); ++i) {
ObInsertTask *task = NULL;
int tmp_ret = OB_SUCCESS;
if (OB_ISNULL(task = insert_resource[i])) {
tmp_ret = OB_ERR_UNEXPECTED;
LOG_ERROR("insert task is null, can not release the memory", K(tmp_ret));
} else {
task->~ObInsertTask();
}
if (OB_SUCC(ret) && OB_SUCCESS != tmp_ret) {
ret = tmp_ret;
}
}
/*
for (int64_t i = 0; i < insert_resource.count(); ++i) {
ObAllocatorSwitch *allocator = NULL;
int tmp_ret = OB_SUCCESS;
if (OB_ISNULL(allocator = ctx_allocators[i])) {
tmp_ret = OB_ERR_UNEXPECTED;
LOG_ERROR("insert task is null, can not release the memory", K(tmp_ret));
} else {
allocator->~ObAllocatorSwitch();
}
if (OB_SUCC(ret) && OB_SUCCESS != tmp_ret) {
ret = tmp_ret;
}
}
*/
int tmp_ret = data_frag_mgr.clear_all_datafrag();
if (OB_SUCCESS != tmp_ret) {
LOG_WARN("fail to clear all data frag", K(tmp_ret));
if (OB_SUCC(ret)) {
ret = tmp_ret;
}
}
for (int64_t i = 0; i < server_infos.count(); ++i) {
if (OB_NOT_NULL(server_infos.at(i))) {
server_infos.at(i)->~ObLoadServerInfo();
}
}
if (OB_NOT_NULL(expr_buffer)) {
ob_free(expr_buffer);
}
//release file reader
if (OB_NOT_NULL(file_reader)) {
file_reader->~ObFileReader();
file_reader = NULL;
}
if (OB_NOT_NULL(temp_handle)) {
temp_handle->~ObShuffleTaskHandle();
}
return ret;
}
int ObLoadDataSPImpl::ToolBox::build_calc_partid_expr(ObExecContext &ctx,
ObLoadDataStmt &load_stmt,
ObTempExpr *&calc_tablet_id_expr)
{
int ret = OB_SUCCESS;
ParamStore paramstore(ObWrapperAllocator(ctx.get_allocator()));
ObInsertStmt *insert_stmt = nullptr;
ObSqlString insert_sql;
ObSEArray<ObString, 16> column_names;
ObLoadArgument &load_args = load_stmt.get_load_arguments();
bool need_online_osg = false;
for (int i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
OZ (column_names.push_back(insert_infos.at(i).column_name_));
}
OZ (ObLoadDataUtils::check_need_opt_stat_gather(ctx, load_stmt, need_online_osg));
OZ (ObLoadDataUtils::build_insert_sql_string_head(load_args.dupl_action_,
load_args.combined_name_,
column_names,
insert_sql,
need_online_osg));
OZ (insert_sql.append(" VALUES("));
for (int i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
if (i != 0) {
OZ (insert_sql.append(","));
}
OZ (insert_sql.append_fmt("'%d'", i));
}
OZ (insert_sql.append(")"));
OZ (ObLoadDataBase::make_parameterize_stmt(ctx, insert_sql, paramstore, insert_stmt));
if (OB_SUCC(ret)) {
ObIArray<ObRawExpr*> &column_convert_exprs = insert_stmt->get_column_conv_exprs();
ObIArray<ObColumnRefRawExpr*> &column_exprs = insert_stmt->get_insert_table_info().column_exprs_;
ObRawExpr *part_expr = nullptr;
ObRawExpr *subpart_expr = nullptr;
ObRawExpr *calc_partid_expr = NULL;
ObTempExpr *temp_expr = nullptr;
TableItem *table_item = nullptr;
RowDesc row_desc;
ObSEArray<ObRawExpr *, 16> insert_columns;
ObSEArray<ObRawExpr *, 16> value_mock_columns;
ObSEArray<ObRawExpr *, 16> field_exprs;
ObSEArray<ObRawExpr *, 16> insert_exprs;
if (insert_stmt->get_table_items().count() != 1
|| OB_ISNULL(table_item = insert_stmt->get_table_items().at(0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected table items", K(ret));
} else {
if (schema::PARTITION_LEVEL_ZERO != load_args.part_level_) {
part_expr = insert_stmt->get_part_expr(table_item->table_id_, table_item->ref_id_);
if (schema::PARTITION_LEVEL_ONE != load_args.part_level_) {
subpart_expr = insert_stmt->get_subpart_expr(table_item->table_id_, table_item->ref_id_);
}
}
}
for (int i = 0; OB_SUCC(ret) && i < num_of_file_column; i++) {
ObColumnRefRawExpr *field_expr = nullptr;
if (OB_FAIL(ctx.get_expr_factory()->create_raw_expr(T_REF_COLUMN, field_expr))) {
LOG_WARN("create column ref raw expr failed", K(ret));
} else if (OB_ISNULL(field_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN(("field_expr is null"));
} else {
field_expr->set_data_type(ObVarcharType);
field_expr->set_collation_type(load_args.file_cs_type_);
field_expr->set_column_attr("__field", ObCharsetUtils::get_const_str(CS_TYPE_UTF8MB4_BIN, '0' + i));
if (OB_FAIL(field_expr->add_flag(IS_COLUMN))) {
LOG_WARN("failed to add flag IS_COLUMN", K(ret));
} else if (OB_FAIL(field_exprs.push_back(field_expr))) {
LOG_WARN("failed to push back", K(ret));
}
}
}
if (OB_SUCC(ret)) {
ObRawExprCopier copier(*ctx.get_expr_factory());
ReplaceVariables replacer(ctx, load_stmt, field_exprs);
for (int i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
ObRawExpr *insert_expr = nullptr;
ObLoadTableColumnDesc &desc = insert_infos.at(i);
if (OB_NOT_NULL(desc.expr_value_)) {
OZ (copier.copy_on_replace(desc.expr_value_, insert_expr, &replacer));
} else {
insert_expr = field_exprs.at(desc.array_ref_idx_);
}
OZ (insert_exprs.push_back(insert_expr));
LOG_DEBUG("push final insert expr", KPC(insert_expr));
}
}
OZ (row_desc.init());
for (int i = 0; OB_SUCC(ret) && i < field_exprs.count(); i++) {
if (OB_FAIL(row_desc.add_column(field_exprs.at(i)))) {
LOG_WARN("fail to add column", K(ret));
}
}
for (int i = 0; OB_SUCC(ret) && i < insert_stmt->get_values_desc().count(); i++) {
OZ (value_mock_columns.push_back(insert_stmt->get_values_desc().at(i)));
}
for (int i = 0; OB_SUCC(ret) && i < column_exprs.count(); i++) {
OZ (insert_columns.push_back(column_exprs.at(i)));
}
if (OB_SUCC(ret)) {
if (OB_FAIL(ObRawExprUtils::build_calc_tablet_id_expr(*ctx.get_expr_factory(),
*ctx.get_my_session(),
load_args.table_id_,
load_args.part_level_,
part_expr,
subpart_expr,
calc_partid_expr))) {
LOG_WARN("fail to build table location expr", K(ret));
} else if (OB_FAIL(ObTransformUtils::replace_exprs(value_mock_columns,
insert_exprs,
column_convert_exprs))) {
LOG_WARN("fail to replace exprs", K(ret));
} else if (OB_FAIL(ObTransformUtils::replace_expr(insert_columns,
column_convert_exprs,
calc_partid_expr))) {
LOG_WARN("fail to replace exprs", K(ret));
} else if (OB_FAIL(calc_partid_expr->formalize(ctx.get_my_session()))) {
LOG_WARN("fail to formalize expr", K(ret));
}
}
if (OB_SUCC(ret)) {
if (OB_ISNULL(ctx.get_sql_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql ctx is null", K(ret));
} else if (OB_FAIL(ObStaticEngineExprCG::gen_expr_with_row_desc(calc_partid_expr,
row_desc,
ctx.get_allocator(),
ctx.get_my_session(),
ctx.get_sql_ctx()->schema_guard_,
temp_expr))) {
LOG_WARN("fail to gen temp expr", K(ret));
} else {
calc_tablet_id_expr = temp_expr;
}
}
if (OB_SUCC(ret)) {
if (OB_ISNULL(ctx.get_physical_plan_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("plan ctx is null", K(ret));
} else {
ctx.get_physical_plan_ctx()->set_autoinc_params(insert_stmt->get_autoinc_params());
}
}
if (OB_SUCC(ret)) {
bool part_key_has_autoinc = false;
OZ (insert_stmt->part_key_has_auto_inc(part_key_has_autoinc));
if (part_key_has_autoinc) {
calc_tablet_id_expr = NULL;
}
}
LOG_DEBUG("LOAD DATA check insert info",
K(column_convert_exprs), K(column_exprs), KPC(calc_partid_expr),
KPC(part_expr), KPC(subpart_expr),
K(insert_stmt->get_values_vector()),
K(insert_stmt->get_values_desc()));
}
return ret;
}
int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stmt)
{
int ret = OB_SUCCESS;
const ObLoadArgument &load_args = load_stmt.get_load_arguments();
const ObDataInFileStruct &file_formats = load_stmt.get_data_struct_in_file();
const ObLoadDataHint &hint = load_stmt.get_hints();
ObIODOpt opt;
ObIODOpts iod_opts;
ObBackupIoAdapter util;
bool need_online_osg = false;
iod_opts.opts_ = &opt;
iod_opts.opt_cnt_ = 0;
formats.init(file_formats);
self_addr = ctx.get_task_executor_ctx()->get_self_addr();
//batch_row_count = DEFAULT_BUFFERRED_ROW_COUNT;
data_frag_mem_usage_limit = 50; //50*2M = 100M
is_oracle_mode = lib::is_oracle_mode();
tenant_id = load_args.tenant_id_;
wait_secs_for_mem_release = 0;
affected_rows = 0;
insert_rt_sum = 0;
suffle_rt_sum = 0;
insert_dispatch_rows = 0;
insert_task_count = 0;
handle_returned_insert_task_count = 0;
insert_mode = load_args.dupl_action_;
load_file_storage = load_args.load_file_storage_;
ignore_rows = load_args.ignore_rows_;
last_session_check_ts = 0;
ObSQLSessionInfo *session = NULL;
ObTempExpr *calc_tablet_id_expr = nullptr;
if (OB_ISNULL(session = ctx.get_my_session()) ||
OB_ISNULL(ctx.get_sql_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is null", K(ret));
} else if (OB_FAIL(data_trimer.init(ctx.get_allocator(), formats))) {
LOG_WARN("fail to init data_trimer", K(ret));
} else if (OB_FAIL(gen_load_table_column_desc(ctx, load_stmt, insert_infos))) {
LOG_WARN("fail to build load table column desc", K(ret));
} else if (OB_FAIL(ObLoadDataUtils::check_need_opt_stat_gather(ctx, load_stmt, need_online_osg))) {
LOG_WARN("fail to check need online stats gather", K(ret));
} else if (OB_FAIL(gen_insert_columns_names_buff(ctx, load_args,
insert_infos,
insert_stmt_head_buff,
need_online_osg))) {
LOG_WARN("fail to gen insert column names buff", K(ret));
} else if (OB_FAIL(data_frag_mgr.init(ctx, load_args.table_id_))) {
LOG_WARN("fail to init data frag mgr", K(ret));
}
//init server_info_map
if (OB_SUCC(ret)) {
if (OB_FAIL(server_info_map.init("serverinfomap", MAX_SERVER_COUNT))) {
LOG_WARN("fail to init server info map", K(ret));
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < data_frag_mgr.get_tablet_ids().count(); ++i) {
ObTabletID tablet_id = data_frag_mgr.get_tablet_ids().at(i);
ObPartDataFragMgr *part_frag_mgr = nullptr;
if (OB_FAIL(data_frag_mgr.get_part_datafrag(tablet_id, part_frag_mgr))) {
LOG_WARN("fail to get part data frag", K(ret), K(tablet_id));
} else if (OB_UNLIKELY(!part_frag_mgr->get_leader_addr().is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("part leader addr is not valid", K(ret), K(tablet_id));
} else {
ObLoadServerInfo *server_info = nullptr;
if (OB_SUCCESS != server_info_map.get(part_frag_mgr->get_leader_addr(), server_info)) {
//no find, create one
if (OB_ISNULL(server_info = OB_NEWx(ObLoadServerInfo, (&ctx.get_allocator())))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("Failed to alloc", K(ret));
} else if (OB_FAIL(server_info_map.insert(part_frag_mgr->get_leader_addr(), server_info))) {
LOG_WARN("fail to insert hash map", K(ret));
} else {
server_info->addr = part_frag_mgr->get_leader_addr();
}
} else {
if (OB_FAIL(server_info_map.get(part_frag_mgr->get_leader_addr(), server_info))) {
LOG_WARN("fail to get server info", K(ret));
}
}
//save part index to server info
if (OB_SUCC(ret)) {
if (OB_FAIL(server_info->part_datafrag_group.push_back(part_frag_mgr))) {
LOG_WARN("fail to add member", K(ret));
}
}
}
}
//init server_info
if (OB_SUCC(ret)) {
auto push_to_array = [&] (const ObAddr &key, ObLoadServerInfo *value) -> bool {
UNUSED(key);
return OB_SUCC(server_infos.push_back(value));
};
if (OB_FAIL(server_infos.reserve(server_info_map.size()))) {
LOG_WARN("fail to pre allocate", K(ret));
} else if (OB_FAIL(server_info_map.for_each(push_to_array))) {
LOG_WARN("fail to for each", K(ret));
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(session->get_tx_timeout(txn_timeout))) {
LOG_WARN("fail to get transaction timeout", K(ret));
} else {
txn_timeout = std::max(txn_timeout, RPC_BATCH_INSERT_TIMEOUT_US);
txn_timeout = std::min(txn_timeout, MIN_TO_USEC(10));
}
}
if (OB_SUCC(ret)) {
file_read_param.file_location_ = load_file_storage;
file_read_param.filename_ = load_args.file_name_;
file_read_param.access_info_ = load_args.access_info_;
file_read_param.packet_handle_ = &ctx.get_my_session()->get_pl_query_sender()->get_packet_sender();
file_read_param.session_ = ctx.get_my_session();
file_read_param.timeout_ts_ = THIS_WORKER.get_timeout_ts();
if (OB_FAIL(ObFileReader::open(file_read_param, ctx.get_allocator(), file_reader))) {
LOG_WARN("failed to open file.", KR(ret), K(file_read_param), K(load_args.file_name_));
} else if (!file_reader->seekable()) {
file_size = -1;
} else if (OB_FAIL(file_reader->get_file_size(file_size))) {
LOG_WARN("fail to get io device file size", KR(ret), K(file_size));
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < insert_infos.count(); ++i) {
const ObLoadTableColumnDesc &desc = insert_infos.at(i);
if (!desc.is_set_values_ && (ob_is_string_tc(desc.column_type_) || ob_is_enumset_tc(desc.column_type_))) {
if (OB_FAIL(string_type_column_bitset.add_member(i))) {
LOG_WARN("fail to add bitset", K(ret));
}
}
}
if (OB_SUCC(ret)) {
void *buf = NULL;
num_of_file_column = load_stmt.get_field_or_var_list().count();
num_of_table_column = insert_infos.count();
if (OB_FAIL(insert_values.prepare_allocate(num_of_table_column))) {
LOG_WARN("fail to reserve array", K(ret));
} else if (OB_FAIL(field_values_in_file.prepare_allocate(num_of_file_column))) {
LOG_WARN("fail to reserve array", K(ret));
} else if (OB_ISNULL(buf = ob_malloc(ObLoadFileBuffer::MAX_BUFFER_SIZE,
ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret));
} else if (FALSE_IT(expr_buffer = new(buf) ObLoadFileBuffer(
ObLoadFileBuffer::MAX_BUFFER_SIZE - sizeof(ObLoadFileBuffer)))) {
} else if (OB_FAIL(generator.init(*session, expr_buffer, ctx.get_sql_ctx()->schema_guard_))) {
LOG_WARN("fail to init generator", K(ret));
} else if (OB_FAIL(generator.set_params(insert_stmt_head_buff, load_args.file_cs_type_,
session->get_sql_mode()))) {
LOG_WARN("fail to set params", K(ret));
} else if (OB_FAIL(copy_exprs_for_shuffle_task(ctx, load_stmt, insert_infos,
generator.get_field_exprs(),
generator.get_insert_exprs()))) {
LOG_WARN("fail to copy exprs", K(ret));
}
}
if (OB_SUCC(ret)) {
plan.set_vars(ctx.get_stmt_factory()->get_query_ctx()->variables_);
ctx.get_my_session()->set_cur_phy_plan(&plan);
OX(ctx.reference_my_plan(&plan));
OZ(ctx.init_phy_op(1));
if (OB_SUCC(ret) && load_args.part_level_ != PARTITION_LEVEL_ZERO) {
if (OB_FAIL(build_calc_partid_expr(ctx, load_stmt, calc_tablet_id_expr))) {
LOG_WARN("fail to build expr", K(ret));
}
}
if (OB_SUCC(ret)) {
char *buf = NULL;
int64_t size = ctx.get_serialize_size();
int64_t pos = 0;
if (OB_ISNULL(buf = static_cast<char *>(ctx.get_allocator().alloc(size)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret), K(size));
} else if (OB_FAIL(ctx.serialize(buf, size, pos))) {
LOG_WARN("fail to serialize ctx", K(ret), K(size), K(pos));
} else {
exec_ctx_serialized_data = ObString(size, buf);
}
}
}
if (OB_SUCC(ret)) {
double min_cpu;
double max_cpu;
if (OB_ISNULL(GCTX.omt_)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(GCTX.omt_->get_tenant_cpu(load_args.tenant_id_, min_cpu, max_cpu))) {
LOG_WARN("fail to get tenant cpu", K(ret));
} else {
max_cpus = std::max(1L, lround(min_cpu));
}
}
if (OB_SUCC(ret)) {
int64_t hint_parallel = 0;
if (OB_FAIL(hint.get_value(ObLoadDataHint::PARALLEL_THREADS, hint_parallel))) {
LOG_WARN("fail to get value", K(ret));
} else {
LOG_DEBUG("parallel calc", K(hint_parallel), K(max_cpus));
parallel = hint_parallel > 0 ? hint_parallel : DEFAULT_PARALLEL_THREAD_COUNT;
//parallel = std::min(parallel, max_cpus);
}
}
if (OB_SUCC(ret)) {
int64_t hint_batch_size = 0;
int64_t hint_max_batch_buffer_size = 0;
ObString hint_batch_buffer_size_str;
if (OB_FAIL(hint.get_value(ObLoadDataHint::BATCH_SIZE, hint_batch_size))) {
LOG_WARN("fail to get value", K(ret));
} else if (0 == hint_batch_size) {
batch_row_count = DEFAULT_BUFFERRED_ROW_COUNT;
} else {
batch_row_count = std::max(1L, std::min(DEFAULT_BUFFERRED_ROW_COUNT, hint_batch_size));
}
if (OB_SUCC(ret)) {
if (OB_FAIL(hint.get_value(ObLoadDataHint::BATCH_BUFFER_SIZE, hint_batch_buffer_size_str))) {
LOG_WARN("fail to get value", K(ret));
} else {
bool is_valid = false;
hint_batch_buffer_size_str = hint_batch_buffer_size_str.trim();
if (!hint_batch_buffer_size_str.empty()) {
hint_max_batch_buffer_size = ObConfigCapacityParser::get(to_cstring(hint_batch_buffer_size_str), is_valid);
}
if (!is_valid) {
hint_max_batch_buffer_size = 1L << 30; // 1G
}
batch_buffer_size = MAX(ObLoadFileBuffer::MAX_BUFFER_SIZE, hint_max_batch_buffer_size);
}
}
LOG_DEBUG("batch size", K(hint_batch_size), K(batch_row_count), K(batch_buffer_size));
}
if (OB_SUCC(ret)) {
int64_t query_timeout = 0;
if (OB_FAIL(hint.get_value(ObLoadDataHint::QUERY_TIMEOUT, query_timeout))) {
LOG_WARN("fail to get value", K(ret));
} else if (0 == query_timeout) {
if (OB_FAIL(ctx.get_my_session()->get_query_timeout(query_timeout))) {
LOG_WARN("fail to get query timeout", KR(ret));
} else {
query_timeout = MAX(query_timeout, RPC_BATCH_INSERT_TIMEOUT_US);
THIS_WORKER.set_timeout_ts(ctx.get_my_session()->get_query_start_time() + query_timeout);
}
} else if (query_timeout > 0) {
THIS_WORKER.set_timeout_ts(ctx.get_my_session()->get_query_start_time() + query_timeout);
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(parser.init(file_formats, num_of_file_column, load_args.file_cs_type_))) {
LOG_WARN("fail to init parser", K(ret));
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(shuffle_task_controller.init(parallel))) {
LOG_WARN("fail to init shuffle task controller", K(ret));
} else if (OB_FAIL(shuffle_task_reserve_queue.init(parallel + 1))) {
LOG_WARN("fail to init shuffle_task_reserve_queue", K(ret));
} else if (OB_FAIL(insert_task_controller.init(parallel * server_infos.count()))) {
LOG_WARN("fail to init insert task controller", K(ret));
} else if (OB_FAIL(insert_task_reserve_queue.init(parallel * server_infos.count() + 1))) {
LOG_WARN("fail to init insert_task_reserve_queue", K(ret));
} else if (OB_FAIL(ctx_allocators.reserve(parallel))) {
LOG_WARN("fail to pre alloc allocators", K(ret));
}
/*
for (int i = 0; OB_SUCC(ret) && i <parallel; ++i) {
ObAllocatorSwitch *allocator = NULL;
if (OB_ISNULL(allocator = OB_NEWx(ObAllocatorSwitch, (&ctx.get_allocator())))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("Failed to alloc", K(ret));
} else if (OB_FAIL(ctx_allocators.push_back(allocator))) {
allocator->~ObAllocatorSwitch();
LOG_WARN("fail to push back", K(ret));
}
}
*/
if (OB_SUCC(ret)) {
if (OB_ISNULL(temp_handle = OB_NEWx(ObShuffleTaskHandle, (&ctx.get_allocator()),
data_frag_mgr, string_type_column_bitset, tenant_id))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("Failed to alloc", K(ret));
} else if (OB_FAIL(temp_handle->expand_buf(batch_buffer_size))) {
LOG_WARN("fail to expand buf", K(ret));
}
}
for (int i = 0; OB_SUCC(ret) && i < shuffle_task_controller.get_max_parallelism(); ++i) {
ObShuffleTaskHandle *handle = nullptr;
int64_t pos = 0;
if (OB_ISNULL(handle = OB_NEWx(ObShuffleTaskHandle, (&ctx.get_allocator()),
data_frag_mgr, string_type_column_bitset, tenant_id))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("Failed to alloc", K(ret));
} else {
if (OB_FAIL(handle->expand_buf(batch_buffer_size))) {
LOG_WARN("fail to expand buf", K(ret));
} else if (OB_FAIL(handle->exec_ctx.deserialize(exec_ctx_serialized_data.ptr(),
exec_ctx_serialized_data.length(), pos))) {
LOG_WARN("fail to deserialize", K(ret));
} else if (OB_FAIL(handle->parser.init(file_formats, num_of_file_column, load_args.file_cs_type_))) {
LOG_WARN("fail to init parser", K(ret));
} else if (OB_FAIL(handle->generator.set_params(insert_stmt_head_buff, load_args.file_cs_type_, session->get_sql_mode()))) {
LOG_WARN("fail to set params", K(ret));
} else if (OB_FAIL(copy_exprs_for_shuffle_task(ctx, load_stmt, insert_infos,
handle->generator.get_field_exprs(),
handle->generator.get_insert_exprs()))) {
LOG_WARN("fail to copy exprs", K(ret));
} else if (OB_FAIL(shuffle_task_reserve_queue.push_back(handle))) {
LOG_WARN("fail to push back", K(ret));
}
if (OB_SUCC(ret)) {
handle->calc_tablet_id_expr = calc_tablet_id_expr;
ObObj *obj_array = nullptr;
if (OB_ISNULL(obj_array = static_cast<ObObj*>(
handle->allocator.alloc(sizeof(ObObj) * num_of_file_column)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
} else {
for (ObObj *ptr = obj_array; ptr < obj_array + num_of_file_column; ++ptr) {
new(ptr)ObObj();
ptr->set_type(ObVarcharType);
ptr->set_collation_type(load_args.file_cs_type_);
}
handle->row_in_file.assign(obj_array, num_of_file_column);
}
}
if (OB_FAIL(ret) || OB_FAIL(shuffle_resource.push_back(handle))) {
handle->~ObShuffleTaskHandle();
LOG_WARN("init shuffle handle failed", K(ret));
}
}
}
for (int i = 0; OB_SUCC(ret) && i < insert_task_controller.get_max_parallelism(); ++i) {
int64_t server_j = i % server_infos.count();
ObInsertTask *insert_task = nullptr;
if (OB_ISNULL(insert_task = OB_NEWx(ObInsertTask, (&ctx.get_allocator())))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("Failed to alloc", K(ret));
} else if (OB_FAIL(insert_task->timezone_.deep_copy(ctx.get_my_session()->get_tz_info_wrap()))) {
LOG_WARN("fail to copy timezone", K(ret));
} else {
//insert的column name都是一样的,所有的task共用一块儿buf做序列化就可以了
insert_task->insert_stmt_head_ = insert_stmt_head_buff;
insert_task->column_count_ = insert_infos.count();
insert_task->row_count_ = batch_row_count;
insert_task->tenant_id_ = ctx.get_my_session()->get_effective_tenant_id();
insert_task->token_server_idx_ = server_j;
insert_task->sql_mode_ = ctx.get_my_session()->get_sql_mode();
if (OB_FAIL(insert_resource.push_back(insert_task))) {
insert_task->~ObInsertTask();
LOG_WARN("fail to push back", K(ret));
} else if (OB_FAIL(insert_task_reserve_queue.push_back(insert_task))) {
LOG_WARN("fail to push back", K(ret));
}
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(server_last_available_ts.init(ObMemAttr(tenant_id, ObModIds::OB_SQL_LOAD_DATA), MAX_SERVER_COUNT))) {
LOG_WARN("fail to create server map", K(ret));
}
}
}
constexpr const char* dict = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
constexpr int word_base = 62; //length of dict
const int64_t file_id_len = 6;
int64_t cur_ts = ObTimeUtil::current_time();
if (OB_SUCC(ret)) {
char *buf = NULL;
static const char* loadlog_str = "log/obloaddata.log.";
int64_t pre_len = strlen(loadlog_str);
int64_t buf_len = file_id_len + pre_len;
int64_t pos = 0;
if (OB_ISNULL(buf = static_cast<char*>(ctx.get_allocator().alloc(buf_len)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no memory", K(ret), K(buf_len));
} else {
MEMCPY(buf + pos, loadlog_str, pre_len);
pos += pre_len;
uint32_t hash_ts = ::murmurhash2(&cur_ts, sizeof(cur_ts), 0);
for (int i = 0; i < file_id_len && pos < buf_len; ++i) {
buf[pos++] = dict[hash_ts % word_base];
hash_ts /= word_base;
}
}
if (OB_SUCC(ret)) {
log_file_name = ObString(pos, buf);
}
}
if (OB_SUCC(ret)) {
const int64_t fake_file_size = (file_size > 0) ? file_size : (2 << 30); // use 2G as default in load local mode
int64_t max_task_count = (fake_file_size / ObLoadFileBuffer::MAX_BUFFER_SIZE + 1) * 2;
if (OB_FAIL(file_buf_row_num.reserve(max_task_count))) {
LOG_WARN("fail to reserve", K(ret));
}
}
if (OB_SUCC(ret)) {
char *buf = NULL;
int64_t buf_len = DEFAULT_BUF_LENGTH;
int64_t pos = 0;
if (OB_ISNULL(buf = static_cast<char*>(ctx.get_allocator().alloc(buf_len)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no memory", K(ret), K(buf_len));
} else {
const ObString &cur_query_str = ctx.get_my_session()->get_current_query_string();
OZ (databuff_printf(buf, buf_len, pos,
"Tenant name:\t%.*s\n"
"File name:\t%.*s\n"
"Into table:\t%.*s\n"
"Parallel:\t%ld\n"
"Batch size:\t%ld\n"
"SQL trace:\t%s\n",
session->get_tenant_name().length(), session->get_tenant_name().ptr(),
load_args.file_name_.length(), load_args.file_name_.ptr(),
load_args.combined_name_.length(), load_args.combined_name_.ptr(),
parallel,
batch_row_count,
ObCurTraceId::get_trace_id_str()
));
OZ (databuff_printf(buf, buf_len, pos, "Start time:\t"));
OZ (ObTimeConverter::datetime_to_str(cur_ts,
TZ_INFO(session),
ObString(),
MAX_SCALE_FOR_TEMPORAL,
buf, buf_len, pos, true));
OZ (databuff_printf(buf, buf_len, pos, "\n"));
OZ (databuff_printf(buf, buf_len, pos, "Load query: \n%.*s\n",
cur_query_str.length(), cur_query_str.ptr()));
OX (load_info.assign_ptr(buf, pos));
}
}
if (OB_SUCC(ret)) {
job_status = nullptr;
if (OB_ISNULL(job_status = OB_NEWx(ObLoadDataStat, (&ctx.get_allocator())))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
} else {
ObLoadDataGID temp_gid;
ObLoadDataGID::generate_new_id(temp_gid);
job_status->tenant_id_ = tenant_id;
job_status->job_id_ = temp_gid.id;
job_status->allocator_.set_tenant_id(tenant_id);
OZ(ob_write_string(job_status->allocator_,
load_args.combined_name_, job_status->table_name_));
OZ(ob_write_string(job_status->allocator_,
load_args.file_name_, job_status->file_path_));
job_status->file_column_ = num_of_file_column;
job_status->table_column_ = num_of_table_column;
job_status->batch_size_ = batch_row_count;
job_status->parallel_ = parallel;
job_status->load_mode_ = static_cast<int64_t>(insert_mode);
job_status->start_time_ = common::ObTimeUtility::current_time();
job_status->total_bytes_ = file_size;
if (OB_FAIL(ObGlobalLoadDataStatMap::getInstance()->register_job(temp_gid, job_status))) {
LOG_WARN("fail to register job", K(ret));
} else {
gid = temp_gid;
}
}
}
return ret;
}
} // sql
} // oceanbase