[FEAT MERGE] load local files

This commit is contained in:
hnwyllmm
2023-12-14 04:13:25 +00:00
committed by ob-robot
parent 9187748933
commit 4d4732b7c7
45 changed files with 1671 additions and 503 deletions

View File

@ -183,6 +183,7 @@ ob_set_subtarget(ob_sql engine_cmd
engine/cmd/ob_kill_executor.cpp
engine/cmd/ob_kill_session_arg.cpp
engine/cmd/ob_load_data_direct_impl.cpp
engine/cmd/ob_load_data_file_reader.cpp
engine/cmd/ob_load_data_executor.cpp
engine/cmd/ob_load_data_impl.cpp
engine/cmd/ob_load_data_parser.cpp

View File

@ -9,7 +9,6 @@
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/cmd/ob_load_data_direct_impl.h"
@ -20,6 +19,7 @@
#include "observer/table_load/ob_table_load_table_ctx.h"
#include "observer/table_load/ob_table_load_task.h"
#include "observer/table_load/ob_table_load_task_scheduler.h"
#include "observer/mysql/ob_query_driver.h"
#include "share/schema/ob_schema_getter_guard.h"
#include "share/ob_device_manager.h"
#include "share/backup/ob_backup_io_adapter.h"
@ -226,208 +226,6 @@ int ObLoadDataDirectImpl::Logger::log_error_line(const ObString &file_name, int6
return ret;
}
/**
* RandomFileReader
*/
ObLoadDataDirectImpl::RandomFileReader::RandomFileReader() : is_inited_(false)
{
}
ObLoadDataDirectImpl::RandomFileReader::~RandomFileReader()
{
}
int ObLoadDataDirectImpl::RandomFileReader::open(const DataAccessParam &data_access_param, const ObString &filename)
{
int ret = OB_SUCCESS;
UNUSED(data_access_param);
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("RandomFileReader init twice", KR(ret), KP(this));
} else if (OB_FAIL(file_reader_.open(filename.ptr(), false))) {
LOG_WARN("fail to open file", KR(ret), K(filename));
} else {
filename_ = filename;
is_inited_ = true;
}
return ret;
}
int ObLoadDataDirectImpl::RandomFileReader::pread(char *buf, int64_t count, int64_t offset, int64_t &read_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("RandomFileReader not init", KR(ret), KP(this));
} else if (OB_FAIL(file_reader_.pread(buf, count, offset, read_size))) {
LOG_WARN("fail to pread file buf", KR(ret), K(count), K(offset), K(read_size));
}
return ret;
}
int ObLoadDataDirectImpl::RandomFileReader::get_file_size(int64_t &file_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("RandomFileReader not init", KR(ret), KP(this));
} else {
file_size = ::get_file_size(filename_.ptr());
}
return ret;
}
/**
* RandomOSSReader
*/
ObLoadDataDirectImpl::RandomOSSReader::RandomOSSReader() : device_handle_(nullptr), is_inited_(false)
{
}
ObLoadDataDirectImpl::RandomOSSReader::~RandomOSSReader()
{
if (fd_.is_valid()) {
device_handle_->close(fd_);
fd_.reset();
}
if (nullptr != device_handle_) {
common::ObDeviceManager::get_instance().release_device(device_handle_);
device_handle_ = nullptr;
}
}
int ObLoadDataDirectImpl::RandomOSSReader::open(const DataAccessParam &data_access_param,
const ObString &filename)
{
int ret = OB_SUCCESS;
ObIODOpt opt;
ObIODOpts iod_opts;
ObBackupIoAdapter util;
iod_opts.opts_ = &opt;
iod_opts.opt_cnt_ = 0;
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("RandomOSSReader init twice", KR(ret), KP(this));
} else if (OB_FAIL(
util.get_and_init_device(device_handle_, &data_access_param.access_info_, filename))) {
LOG_WARN("fail to get device manager", KR(ret), K(filename));
} else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
LOG_WARN("fail to set access type", KR(ret));
} else if (OB_FAIL(device_handle_->open(to_cstring(filename), -1, 0, fd_, &iod_opts))) {
LOG_WARN("fail to open oss file", KR(ret), K(filename));
} else {
is_inited_ = true;
}
return ret;
}
int ObLoadDataDirectImpl::RandomOSSReader::pread(char *buf, int64_t count, int64_t offset,
int64_t &read_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("RandomOSSReader not init", KR(ret), KP(this));
} else if (OB_FAIL(device_handle_->pread(fd_, offset, count, buf, read_size))) {
LOG_WARN("fail to pread oss buf", KR(ret), K(offset), K(count), K(read_size));
}
return ret;
}
int ObLoadDataDirectImpl::RandomOSSReader::get_file_size(int64_t &file_size)
{
int ret = OB_SUCCESS;
ObBackupIoAdapter util;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("RandomOSSReader not init", KR(ret), KP(this));
} else if (OB_FAIL(util.get_file_size(device_handle_, fd_, file_size))) {
LOG_WARN("fail to get oss file size", KR(ret), K(file_size));
}
return ret;
}
/**
* SequentialDataAccessor
*/
ObLoadDataDirectImpl::SequentialDataAccessor::SequentialDataAccessor()
: random_io_device_(nullptr), offset_(0), is_inited_(false)
{
}
ObLoadDataDirectImpl::SequentialDataAccessor::~SequentialDataAccessor()
{
}
int ObLoadDataDirectImpl::SequentialDataAccessor::init(const DataAccessParam &data_access_param,
const ObString &filename)
{
int ret = OB_SUCCESS;
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor init twice", KR(ret), KP(this));
} else if (OB_UNLIKELY(!data_access_param.is_valid() || filename.empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid args", KR(ret), K(data_access_param), K(filename));
} else {
if (data_access_param.file_location_ == ObLoadFileLocation::SERVER_DISK) {
if (OB_FAIL(random_file_reader_.open(data_access_param, filename))) {
LOG_WARN("fail to open random file reader", KR(ret), K(filename));
} else {
random_io_device_ = &random_file_reader_;
}
} else if (data_access_param.file_location_ == ObLoadFileLocation::OSS) {
if (OB_FAIL(random_oss_reader_.open(data_access_param, filename))) {
LOG_WARN("fail to open random oss reader", KR(ret), K(filename));
} else {
random_io_device_ = &random_oss_reader_;
}
} else {
ret = OB_NOT_SUPPORTED;
LOG_WARN("not supported load file location", KR(ret), K(data_access_param.file_location_));
FORWARD_USER_ERROR_MSG(ret, "not supported load file location");
}
if (OB_SUCC(ret)) {
is_inited_ = true;
}
}
return ret;
}
int ObLoadDataDirectImpl::SequentialDataAccessor::read(char *buf, int64_t count, int64_t &read_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor not init", KR(ret), KP(this));
} else if (OB_UNLIKELY(nullptr == buf || count <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid args", KR(ret), KP(buf), K(count));
} else {
if (OB_FAIL(random_io_device_->pread(buf, count, offset_, read_size))) {
LOG_WARN("fail to do pread", KR(ret), K(offset_));
} else {
offset_ += read_size;
}
}
return ret;
}
int ObLoadDataDirectImpl::SequentialDataAccessor::get_file_size(int64_t &file_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor not init", KR(ret), KP(this));
} else if (OB_FAIL(random_io_device_->get_file_size(file_size))) {
LOG_WARN("fail to get random io device file size", KR(ret), K(file_size));
}
return ret;
}
/**
* DataDescIterator
*/
@ -637,10 +435,23 @@ void ObLoadDataDirectImpl::DataBuffer::swap(DataBuffer &other)
*/
ObLoadDataDirectImpl::DataReader::DataReader()
: execute_ctx_(nullptr), end_offset_(0), read_raw_(false), is_iter_end_(false), is_inited_(false)
: execute_ctx_(nullptr),
file_reader_(nullptr),
end_offset_(0),
read_raw_(false),
is_iter_end_(false),
is_inited_(false)
{
}
ObLoadDataDirectImpl::DataReader::~DataReader()
{
if (OB_NOT_NULL(file_reader_)) {
file_reader_->~ObFileReader();
file_reader_ = nullptr;
}
}
int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_param,
LoadExecuteContext &execute_ctx,
const DataDesc &data_desc, bool read_raw)
@ -665,13 +476,28 @@ int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_pa
}
if (OB_SUCC(ret)) {
end_offset_ = data_desc.end_;
if (OB_FAIL(io_accessor_.init(data_access_param, data_desc.filename_))) {
LOG_WARN("fail to init io device", KR(ret), K(data_desc));
} else if (end_offset_ == -1 && OB_FAIL(io_accessor_.get_file_size(end_offset_))) {
LOG_WARN("fail to get file size", KR(ret), K(data_desc));
} else {
io_accessor_.seek(data_desc.start_);
ATOMIC_AAF(&execute_ctx_->job_stat_->total_bytes_, (end_offset_ - data_desc.start_));
ObFileReadParam file_read_param;
file_read_param.file_location_ = data_access_param.file_location_;
file_read_param.filename_ = data_desc.filename_;
file_read_param.access_info_ = data_access_param.access_info_;
file_read_param.packet_handle_ = &execute_ctx.exec_ctx_.get_session_info()->get_pl_query_sender()->get_packet_sender();
file_read_param.session_ = execute_ctx.exec_ctx_.get_session_info();
file_read_param.timeout_ts_ = THIS_WORKER.get_timeout_ts();
if (OB_FAIL(ObFileReader::open(file_read_param, *execute_ctx_->allocator_, file_reader_))) {
LOG_WARN("failed to open file", KR(ret), K(data_desc));
} else if (file_reader_->seekable()) {
if (end_offset_ == -1 && OB_FAIL(file_reader_->get_file_size(end_offset_))) {
LOG_WARN("fail to get file size", KR(ret), K(data_desc));
} else {
file_reader_->seek(data_desc.start_);
ATOMIC_AAF(&execute_ctx_->job_stat_->total_bytes_, (end_offset_ - data_desc.start_));
}
} else if (data_desc.start_ != 0) {
ret = OB_NOT_SUPPORTED; // should not happen
LOG_WARN("file reader asked to seek while not supported by unseekable file", KR(ret), K(file_read_param));
}
}
if (OB_SUCC(ret)) {
@ -681,6 +507,32 @@ int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_pa
return ret;
}
int ObLoadDataDirectImpl::DataReader::read_buffer(ObLoadFileBuffer &file_buffer)
{
int ret = OB_SUCCESS;
int64_t read_count = file_buffer.get_remain_len();
if (0 == read_count) {
ret = OB_BUF_NOT_ENOUGH;
LOG_WARN("cannot read more data as buffer is full", KR(ret));
} else if (!is_end_file()) {
int64_t read_size = 0;
if (end_offset_ > 0 && read_count > (end_offset_ - file_reader_->get_offset())) {
read_count = end_offset_ - file_reader_->get_offset();
}
if (OB_FAIL(file_reader_->readn(file_buffer.current_ptr(), read_count, read_size))) {
LOG_WARN("fail to read file", KR(ret));
} else if (0 == read_size) {
LOG_TRACE("read nothing", K(is_end_file()));
} else {
file_buffer.update_pos(read_size); // 更新buffer中数据长度
LOG_TRACE("read file sucess", K(read_size));
ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
}
}
return ret;
}
int ObLoadDataDirectImpl::DataReader::get_next_buffer(ObLoadFileBuffer &file_buffer,
int64_t &line_count, int64_t limit)
{
@ -704,25 +556,14 @@ int ObLoadDataDirectImpl::DataReader::get_next_buffer(ObLoadFileBuffer &file_buf
if (OB_FAIL(data_trimer_.recover_incomplate_data(file_buffer))) {
LOG_WARN("fail to recover incomplate data", KR(ret));
}
// 2. 从文件里读取后续的数据
else if (!is_end_file()) {
int64_t read_count = 0;
int64_t read_size = 0;
if (FALSE_IT(read_count =
MIN(file_buffer.get_remain_len(), end_offset_ - io_accessor_.get_offset()))) {
} else if (OB_FAIL(io_accessor_.read(file_buffer.current_ptr(), read_count, read_size))) {
LOG_WARN("fail to read file", KR(ret));
} else if (OB_UNLIKELY(read_count != read_size)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected read size", KR(ret), K(read_count), K(read_size), K(end_offset_));
} else {
file_buffer.update_pos(read_size); // 更新buffer中数据长度
ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
}
}
// 3. 从buffer中找出完整的行,剩下的数据缓存到data_trimer
// 2. 读取数据,然后从buffer中找出完整的行,剩下的数据缓存到data_trimer
if (OB_SUCC(ret)) {
if (!file_buffer.is_valid()) {
int64_t complete_cnt = limit;
int64_t complete_len = 0;
if (OB_FAIL(read_buffer(file_buffer))) {
LOG_WARN("failed to read buffer as there is not enough data to parse", KR(ret));
} else if (!file_buffer.is_valid()) {
is_iter_end_ = true;
ret = OB_ITER_END;
} else {
@ -760,19 +601,37 @@ int ObLoadDataDirectImpl::DataReader::get_next_raw_buffer(DataBuffer &data_buffe
} else if (is_end_file()) {
ret = OB_ITER_END;
} else if (data_buffer.get_remain_length() > 0) {
const int64_t read_count =
MIN(data_buffer.get_remain_length(), end_offset_ - io_accessor_.get_offset());
int64_t read_count = data_buffer.get_remain_length();
if (file_reader_->seekable() && read_count > end_offset_ - file_reader_->get_offset()) {
read_count = end_offset_ - file_reader_->get_offset();
}
int64_t read_size = 0;
if (OB_FAIL(io_accessor_.read(data_buffer.data() + data_buffer.get_data_length(), read_count,
if (OB_FAIL(file_reader_->readn(data_buffer.data() + data_buffer.get_data_length(), read_count,
read_size))) {
LOG_WARN("fail to read file", KR(ret));
} else if (OB_UNLIKELY(read_count != read_size)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected read size", KR(ret), K(read_count), K(read_size), K(end_offset_));
} else {
} else if (read_size > 0) {
data_buffer.update_data_length(read_size);
ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
}
} else {
// read_size == 0
if (is_end_file()) {
ret = OB_ITER_END;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("read buffer got unexpected bytes.", K(read_size), K(read_count), K(ret));
}
}
}
return ret;
}
bool ObLoadDataDirectImpl::DataReader::is_end_file() const
{
bool ret = false;
if (file_reader_->eof()) {
ret = true;
} else if (end_offset_ > 0) {
ret = file_reader_->get_offset() >= end_offset_;
}
return ret;
}
@ -931,16 +790,32 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
data_access_param.file_cs_type_))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected data format", KR(ret), K(data_access_param));
} else if (1 == count) {
} else if (1 == count || (ObLoadFileLocation::CLIENT_DISK == data_access_param.file_location_)) {
if (OB_FAIL(data_desc_iter.add_data_desc(data_desc))) {
LOG_WARN("fail to push back", KR(ret));
}
} else {
ObArenaAllocator allocator;
allocator.set_tenant_id(MTL_ID());
int64_t end_offset = data_desc.end_;
SequentialDataAccessor io_device;
if (OB_FAIL(io_device.init(data_access_param, data_desc.filename_))) {
LOG_WARN("fail to init io device", KR(ret), K(data_desc.filename_));
} else if (-1 == end_offset && OB_FAIL(io_device.get_file_size(end_offset))) {
ObFileReadParam file_read_param;
file_read_param.file_location_ = data_access_param.file_location_;
file_read_param.filename_ = data_desc.filename_;
file_read_param.access_info_ = data_access_param.access_info_;
file_read_param.packet_handle_ = NULL;
file_read_param.session_ = NULL;
file_read_param.timeout_ts_ = THIS_WORKER.get_timeout_ts();
ObFileReader *file_reader = NULL;
if (OB_FAIL(ObFileReader::open(file_read_param, allocator, file_reader))) {
LOG_WARN("failed to open file.", KR(ret), K(data_desc));
} else if (!file_reader->seekable()) {
if (OB_FAIL(data_desc_iter.add_data_desc(data_desc))) {
LOG_WARN("fail to push back", KR(ret));
}
} else if (-1 == end_offset && OB_FAIL(file_reader->get_file_size(end_offset))) {
LOG_WARN("fail to get io device file size", KR(ret), K(end_offset));
} else {
const int64_t file_size = end_offset - data_desc.start_;
@ -953,26 +828,24 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
const char line_term_char = data_access_param.file_format_.line_term_str_.ptr()[0];
const int64_t buf_size = (128LL << 10) + 1;
const int64_t split_size = file_size / count;
ObArenaAllocator allocator;
char *buf = nullptr;
int64_t read_size = 0;
DataDesc data_desc_ret;
data_desc_ret.file_idx_ = data_desc.file_idx_;
data_desc_ret.filename_ = data_desc.filename_;
data_desc_ret.start_ = data_desc.start_;
allocator.set_tenant_id(MTL_ID());
if (OB_ISNULL(buf = static_cast<char *>(allocator.alloc(buf_size)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", KR(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < count - 1; ++i) {
int64_t read_offset = data_desc.start_ + split_size * (i + 1);
io_device.seek(read_offset);
file_reader->seek(read_offset);
char *found = nullptr;
while (OB_SUCC(ret) && end_offset > io_device.get_offset() && nullptr == found) {
read_offset = io_device.get_offset();
while (OB_SUCC(ret) && end_offset > file_reader->get_offset() && nullptr == found) {
read_offset = file_reader->get_offset();
const int64_t read_count = MIN(end_offset - read_offset, buf_size - 1);
if (OB_FAIL(io_device.read(buf, read_count, read_size))) {
if (OB_FAIL(file_reader->readn(buf, read_count, read_size))) {
LOG_WARN("fail to do read", KR(ret), K(read_offset), K(read_count));
} else if (OB_UNLIKELY(read_count != read_size)) {
ret = OB_ERR_UNEXPECTED;
@ -1004,6 +877,11 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
}
}
}
if (OB_NOT_NULL(file_reader)) {
file_reader->~ObFileReader();
allocator.free(file_reader);
}
}
return ret;
}
@ -1139,6 +1017,7 @@ int ObLoadDataDirectImpl::FileLoadExecutor::execute()
LOG_WARN("fail to prepare execute", KR(ret));
}
LOG_TRACE("file load executor prepare execute done", K(ret));
while (OB_SUCC(ret) && OB_SUCC(execute_ctx_->exec_ctx_.check_status())) {
TaskHandle *handle = nullptr;
if (OB_FAIL(get_next_task_handle(handle))) {
@ -1177,8 +1056,9 @@ int ObLoadDataDirectImpl::FileLoadExecutor::execute()
LOG_WARN("fail to handle all task result", KR(ret));
}
}
}
LOG_TRACE("large file load executor init done", K(ret));
return ret;
}

View File

@ -9,7 +9,6 @@
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#pragma once
#include "lib/allocator/page_arena.h"
@ -20,6 +19,7 @@
#include "share/table/ob_table_load_define.h"
#include "sql/engine/cmd/ob_load_data_impl.h"
#include "sql/engine/cmd/ob_load_data_parser.h"
#include "sql/engine/cmd/ob_load_data_file_reader.h"
#include "common/storage/ob_io_device.h"
#include "observer/table_load/ob_table_load_exec_ctx.h"
#include "observer/table_load/ob_table_load_instance.h"
@ -170,61 +170,6 @@ private:
int64_t pos_;
};
class IRandomIODevice
{
public:
virtual ~IRandomIODevice() = default;
virtual int open(const DataAccessParam &data_access_param, const ObString &filename) = 0;
virtual int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) = 0;
virtual int get_file_size(int64_t &file_size) = 0;
};
class RandomFileReader : public IRandomIODevice
{
public:
RandomFileReader();
virtual ~RandomFileReader();
int open(const DataAccessParam &data_access_param, const ObString &filename) override;
int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) override;
int get_file_size(int64_t &file_size) override;
private:
ObString filename_;
ObFileReader file_reader_;
bool is_inited_;
};
class RandomOSSReader : public IRandomIODevice
{
public:
RandomOSSReader();
virtual ~RandomOSSReader();
int open(const DataAccessParam &data_access_param, const ObString &filename) override;
int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) override;
int get_file_size(int64_t &file_size) override;
private:
ObIODevice *device_handle_;
ObIOFd fd_;
bool is_inited_;
};
class SequentialDataAccessor
{
public:
SequentialDataAccessor();
~SequentialDataAccessor();
int init(const DataAccessParam &data_access_param, const ObString &filename);
int read(char *buf, int64_t count, int64_t &read_size);
int get_file_size(int64_t &file_size);
void seek(int64_t offset) { offset_ = offset; }
int64_t get_offset() const { return offset_; }
private:
RandomFileReader random_file_reader_;
RandomOSSReader random_oss_reader_;
IRandomIODevice *random_io_device_;
int64_t offset_;
bool is_inited_;
};
struct DataBuffer
{
public:
@ -256,20 +201,25 @@ private:
{
public:
DataReader();
~DataReader();
int init(const DataAccessParam &data_access_param, LoadExecuteContext &execute_ctx,
const DataDesc &data_desc, bool read_raw = false);
int get_next_buffer(ObLoadFileBuffer &file_buffer, int64_t &line_count,
int64_t limit = INT64_MAX);
int get_next_raw_buffer(DataBuffer &data_buffer);
bool has_incomplate_data() const { return data_trimer_.has_incomplate_data(); }
bool is_end_file() const { return io_accessor_.get_offset() >= end_offset_; }
bool is_end_file() const;
ObCSVGeneralParser &get_csv_parser() { return csv_parser_; }
private:
int read_buffer(ObLoadFileBuffer &file_buffer);
private:
LoadExecuteContext *execute_ctx_;
ObCSVGeneralParser csv_parser_; // 用来计算完整行
ObLoadFileDataTrimer data_trimer_; // 缓存不完整行的数据
SequentialDataAccessor io_accessor_;
int64_t end_offset_;
ObFileReader *file_reader_;
int64_t end_offset_; // use -1 in stream file such as load data local
bool read_raw_;
bool is_iter_end_;
bool is_inited_;

View File

@ -38,6 +38,7 @@ int ObLoadDataExecutor::check_is_direct_load(ObTableDirectInsertCtx &ctx, const
} else {
ctx.set_is_direct(false);
}
LOG_INFO("check load data is direct done.", K(ctx.get_is_direct()));
return ret;
}
@ -70,6 +71,8 @@ int ObLoadDataExecutor::execute(ObExecContext &ctx, ObLoadDataStmt &stmt)
if (OB_SUCC(ret)) {
if (OB_FAIL(load_impl->execute(ctx, stmt))) {
LOG_WARN("failed to execute load data stmt", K(ret));
} else {
LOG_TRACE("load data success");
}
load_impl->~ObLoadDataBase();
}

View File

@ -0,0 +1,458 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "sql/engine/cmd/ob_load_data_file_reader.h"
#include "share/ob_device_manager.h"
#include "share/backup/ob_backup_io_adapter.h"
#include "rpc/obmysql/ob_i_cs_mem_pool.h"
#include "rpc/obmysql/ob_mysql_packet.h"
#include "rpc/obmysql/packet/ompk_local_infile.h"
namespace oceanbase
{
namespace sql
{
/**
* ObFileReadParam
*/
ObFileReadParam::ObFileReadParam()
: packet_handle_(NULL),
session_(NULL),
timeout_ts_(-1)
{
}
/**
* ObFileReader
*/
int ObFileReader::open(const ObFileReadParam &param, ObIAllocator &allocator, ObFileReader *& file_reader)
{
int ret = OB_SUCCESS;
if (param.file_location_ == ObLoadFileLocation::SERVER_DISK) {
ObRandomFileReader *tmp_reader = OB_NEWx(ObRandomFileReader, &allocator, allocator);
if (OB_ISNULL(tmp_reader)) {
LOG_WARN("failed to create ObRandomFileReader", K(ret));
} else if (OB_FAIL(tmp_reader->open(param.filename_))) {
LOG_WARN("fail to open random file reader", KR(ret), K(param.filename_));
tmp_reader->~ObRandomFileReader();
allocator.free(tmp_reader);
} else {
file_reader = tmp_reader;
}
} else if (param.file_location_ == ObLoadFileLocation::OSS) {
ObRandomOSSReader *tmp_reader = OB_NEWx(ObRandomOSSReader, &allocator, allocator);
if (OB_ISNULL(tmp_reader)) {
LOG_WARN("failed to create RandomOSSReader", K(ret));
} else if (OB_FAIL(tmp_reader->open(param.access_info_, param.filename_))) {
LOG_WARN("fail to open random oss reader", KR(ret), K(param.filename_));
tmp_reader->~ObRandomOSSReader();
allocator.free(tmp_reader);
} else {
file_reader = tmp_reader;
}
} else if (param.file_location_ == ObLoadFileLocation::CLIENT_DISK) {
if (OB_ISNULL(param.packet_handle_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("cannot create packet stream file reader while the packet handle is null", K(ret));
} else {
ObPacketStreamFileReader *tmp_reader = OB_NEWx(ObPacketStreamFileReader, &allocator, allocator);
if (OB_ISNULL(tmp_reader)) {
LOG_WARN("failed to create ObPacketStreamFileReader", K(ret));
} else if (OB_FAIL(tmp_reader->open(param.filename_, *param.packet_handle_, param.session_, param.timeout_ts_))) {
LOG_WARN("failed to open packet stream file reader", KR(ret), K(param.filename_));
tmp_reader->~ObPacketStreamFileReader();
allocator.free(tmp_reader);
} else {
file_reader = tmp_reader;
}
}
} else {
ret = OB_NOT_SUPPORTED;
LOG_WARN("not supported load file location", KR(ret), K(param.file_location_));
}
return ret;
}
int ObFileReader::readn(char *buffer, int64_t count, int64_t &read_size)
{
int ret = OB_SUCCESS;
read_size = 0;
while (OB_SUCC(ret) && !eof() && read_size < count) {
int64_t this_read_size = 0;
ret = this->read(buffer + read_size, count - read_size, this_read_size);
if (OB_SUCC(ret)) {
read_size += this_read_size;
}
}
return ret;
}
/**
* ObRandomFileReader
*/
ObRandomFileReader::ObRandomFileReader(ObIAllocator &allocator)
: ObFileReader(allocator),
offset_(0),
eof_(false),
is_inited_(false)
{
}
ObRandomFileReader::~ObRandomFileReader()
{
}
int ObRandomFileReader::open(const ObString &filename)
{
int ret = OB_SUCCESS;
if (is_inited_) {
ret = OB_INIT_TWICE;
LOG_WARN("ObRandomFileReader init twice", KR(ret), KP(this));
} else if (OB_FAIL(file_reader_.open(filename.ptr(), false))) {
LOG_WARN("fail to open file", KR(ret), K(filename));
} else {
filename_ = filename;
offset_ = 0;
eof_ = false;
is_inited_ = true;
}
return ret;
}
int ObRandomFileReader::read(char *buf, int64_t count, int64_t &read_size)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("ObRandomFileReader not init", KR(ret), KP(this));
} else if (OB_FAIL(file_reader_.pread(buf, count, offset_, read_size))) {
LOG_WARN("fail to pread file buf", KR(ret), K(count), K_(offset), K(read_size));
} else if (0 == read_size) {
eof_ = true;
} else {
offset_ += read_size;
}
return ret;
}
int ObRandomFileReader::seek(int64_t offset)
{
offset_ = offset;
return OB_SUCCESS;
}
int ObRandomFileReader::get_file_size(int64_t &file_size)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("ObRandomFileReader not init", KR(ret), KP(this));
} else {
file_size = ::get_file_size(filename_.ptr());
}
return ret;
}
/**
* ObRandomOSSReader
*/
ObRandomOSSReader::ObRandomOSSReader(ObIAllocator &allocator)
: ObFileReader(allocator),
device_handle_(nullptr),
offset_(0),
eof_(false),
is_inited_(false)
{
}
ObRandomOSSReader::~ObRandomOSSReader()
{
if (fd_.is_valid()) {
device_handle_->close(fd_);
fd_.reset();
}
if (nullptr != device_handle_) {
common::ObDeviceManager::get_instance().release_device(device_handle_);
device_handle_ = nullptr;
}
is_inited_ = false;
}
int ObRandomOSSReader::open(const share::ObBackupStorageInfo &storage_info, const ObString &filename)
{
int ret = OB_SUCCESS;
ObIODOpt opt;
ObIODOpts iod_opts;
ObBackupIoAdapter util;
iod_opts.opts_ = &opt;
iod_opts.opt_cnt_ = 0;
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("ObRandomOSSReader init twice", KR(ret), KP(this));
} else if (OB_FAIL(
util.get_and_init_device(device_handle_, &storage_info, filename))) {
LOG_WARN("fail to get device manager", KR(ret), K(filename));
} else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
LOG_WARN("fail to set access type", KR(ret));
} else if (OB_FAIL(device_handle_->open(to_cstring(filename), -1, 0, fd_, &iod_opts))) {
LOG_WARN("fail to open oss file", KR(ret), K(filename));
} else {
offset_ = 0;
eof_ = false;
is_inited_ = true;
}
return ret;
}
int ObRandomOSSReader::read(char *buf, int64_t count, int64_t &read_size)
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObRandomOSSReader not init", KR(ret), KP(this));
} else if (OB_FAIL(device_handle_->pread(fd_, offset_, count, buf, read_size))) {
LOG_WARN("fail to pread oss buf", KR(ret), K_(offset), K(count), K(read_size));
} else if (0 == read_size) {
eof_ = true;
} else {
offset_ += read_size;
}
return ret;
}
int ObRandomOSSReader::seek(int64_t offset)
{
offset_ = offset;
return OB_SUCCESS;
}
int ObRandomOSSReader::get_file_size(int64_t &file_size)
{
int ret = OB_SUCCESS;
ObBackupIoAdapter util;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObRandomOSSReader not init", KR(ret), KP(this));
} else if (OB_FAIL(util.get_file_size(device_handle_, fd_, file_size))) {
LOG_WARN("fail to get oss file size", KR(ret), K(file_size));
}
return ret;
}
/**
* ObPacketStreamFileReader
*/
class CSMemPoolAdaptor : public obmysql::ObICSMemPool
{
public:
explicit CSMemPoolAdaptor(ObIAllocator *allocator)
: allocator_(allocator)
{}
virtual ~CSMemPoolAdaptor() {}
void *alloc(int64_t size) override
{
return allocator_->alloc(size);
}
private:
ObIAllocator *allocator_;
};
ObPacketStreamFileReader::ObPacketStreamFileReader(ObIAllocator &allocator)
: ObStreamFileReader(allocator),
packet_handle_(NULL),
session_(NULL),
timeout_ts_(INT64_MAX),
arena_allocator_(allocator),
cached_packet_(NULL),
received_size_(0),
read_size_(0),
eof_(false)
{
}
ObPacketStreamFileReader::~ObPacketStreamFileReader()
{
int ret = OB_SUCCESS;
// We read all data from client before close the file.
// We will stop to handle the process while something error.
// But the client must send all file content to us and the
// normal SQL processor cannot handle the packets, so we
// eat all packets with file content.
timeout_ts_ = -1;
while (!eof_ && OB_SUCC(ret)) {
ret = receive_packet();
}
arena_allocator_.reset();
}
int ObPacketStreamFileReader::open(const ObString &filename,
observer::ObIMPPacketSender &packet_handle,
ObSQLSessionInfo *session,
int64_t timeout_ts)
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(packet_handle_)) {
ret = OB_INIT_TWICE;
} else {
// in `load data local` request, we should send the filename to client
obmysql::OMPKLocalInfile filename_packet;
filename_packet.set_filename(filename);
if (OB_FAIL(packet_handle.response_packet(filename_packet, session))) {
LOG_INFO("failed to send local infile packet to client", K(ret), K(filename));
} else if (OB_FAIL(packet_handle.flush_buffer(false/*is_last*/))) {
LOG_INFO("failed to flush socket buffer while send local infile packet", K(ret), K(filename));
} else {
LOG_TRACE("send filename to client success", K(filename));
observer::ObSMConnection *sm_connection = session->get_sm_connection();
if (OB_NOT_NULL(sm_connection) &&
sm_connection->pkt_rec_wrapper_.enable_proto_dia()) {
sm_connection->pkt_rec_wrapper_.record_send_mysql_pkt(filename_packet,
filename_packet.get_serialize_size() + OB_MYSQL_HEADER_LENGTH);
}
}
packet_handle_ = &packet_handle;
session_ = session;
timeout_ts_ = timeout_ts;
received_size_ = 0;
read_size_ = 0;
eof_ = false;
}
return ret;
}
/**
* As decripted in MySQL/MariaDB document, client send the file content with
* continous packets and `eof` with an empty packet. Every non-empty packet
* has the format:
* -------------------
* MySQL Packet Header
* string<EOF>
* -------------------
* The notation is "string<EOF>" Strings whose length will be calculated by
* the packet remaining length.
*/
int ObPacketStreamFileReader::read(char *buf, int64_t count, int64_t &read_size)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(cached_packet_) || read_size_ == received_size_) {
ret = receive_packet();
}
const int64_t remain_in_packet = received_size_ - read_size_;
if (OB_SUCC(ret) && OB_NOT_NULL(cached_packet_) && (!eof_ || remain_in_packet > 0)) {
read_size = MIN(count, remain_in_packet);
// a MySQL packet contains a header and payload. The payload is the file content here.
// In the mysql_packet code, it use the first byte as MySQL command, but there is no
// MySQL command in the file content packet, so we backward 1 byte.
const int64_t packet_offset = cached_packet_->get_pkt_len() - remain_in_packet;
MEMCPY(buf, cached_packet_->get_cdata() - 1 + packet_offset, read_size);
read_size_ += read_size;
} else {
read_size = 0;
}
if (is_timeout()) {
ret = OB_TIMEOUT;
LOG_WARN("load data reader file timeout", KR(ret));
} else if (session_ != NULL && session_->is_query_killed()) {
ret = OB_ERR_QUERY_INTERRUPTED;
LOG_WARN("load data reader terminated as the query is killed", KR(ret));
} else if (session_ != NULL && session_->is_zombie()) {
ret = OB_SESSION_KILLED;
LOG_WARN("load data reader terminated as the session is killed", KR(ret));
} else if (!eof_ && read_size == 0) {
ret = OB_IO_ERROR;
LOG_WARN("[should not happen] cannot read data but eof is false", KR(ret));
}
return ret;
}
int ObPacketStreamFileReader::receive_packet()
{
int ret = OB_SUCCESS;
ret = release_packet();
if (OB_SUCC(ret)) {
arena_allocator_.reuse();
CSMemPoolAdaptor mem_pool(&arena_allocator_);
// We read packet until we got one or timeout or error occurs
obmysql::ObMySQLPacket *pkt = NULL;
ret = packet_handle_->read_packet(mem_pool, pkt);
cached_packet_ = static_cast<obmysql::ObMySQLRawPacket *>(pkt);
while (OB_SUCC(ret) && OB_ISNULL(cached_packet_) && !is_timeout() && !is_killed()) {
// sleep can reduce cpu usage while the network is not so good.
// We need not worry about the speed while the speed of load data core is lower than
// file receiver's.
usleep(100 * 1000); // 100 ms
ret = packet_handle_->read_packet(mem_pool, pkt);
cached_packet_ = static_cast<obmysql::ObMySQLRawPacket *>(pkt);
}
if (OB_SUCC(ret) && OB_NOT_NULL(cached_packet_)) {
const int pkt_len = cached_packet_->get_pkt_len();
if (0 == pkt_len) { // empty packet
eof_ = true;
(void)release_packet();
} else {
received_size_ += pkt_len;
LOG_TRACE("got a packet", K(pkt_len));
}
}
}
// If anything wrong, we end the reading
if (OB_FAIL(ret)) {
eof_ = true;
}
return ret;
}
int ObPacketStreamFileReader::release_packet()
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(cached_packet_)) {
ret = packet_handle_->release_packet(cached_packet_);
cached_packet_ = NULL;
}
return ret;
}
bool ObPacketStreamFileReader::is_timeout() const
{
return timeout_ts_ != -1 && ObTimeUtility::current_time() >= timeout_ts_;
}
bool ObPacketStreamFileReader::is_killed() const
{
return NULL != session_ && (session_->is_query_killed() || session_->is_zombie());
}
} // namespace sql
} // namespace oceanbase

View File

@ -0,0 +1,205 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_
#define OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_
#include "lib/oblog/ob_log_module.h"
#include "lib/string/ob_string.h"
#include "lib/allocator/ob_allocator.h"
#include "lib/file/ob_file.h"
#include "sql/resolver/cmd/ob_load_data_stmt.h"
#include "share/backup/ob_backup_struct.h"
#include "observer/mysql/obmp_packet_sender.h"
namespace oceanbase
{
namespace sql
{
class ObSQLSessionInfo;
struct ObFileReadParam
{
public:
ObFileReadParam();
TO_STRING_KV(K_(file_location), K_(filename), K_(timeout_ts));
public:
ObLoadFileLocation file_location_;
ObString filename_;
share::ObBackupStorageInfo access_info_;
observer::ObIMPPacketSender *packet_handle_;
ObSQLSessionInfo *session_;
int64_t timeout_ts_; // A job always has a deadline and file reading may cost a long time
};
class ObFileReader
{
public:
ObFileReader(ObIAllocator &allocator) : allocator_(allocator) {}
virtual ~ObFileReader() {}
/**
* read data from file into the buffer
*
* @note read_size equals to 0 does not mean end of file.
* You should call `eof` to decide whether end of file.
* This is not the same with the system call `read`.
*/
virtual int read(char *buf, int64_t count, int64_t &read_size) = 0;
/**
* get the file size
*
* Stream files may not support this feature.
*/
virtual int get_file_size(int64_t &file_size) = 0;
/**
* seek to the specific position and the `read` subsequently fetch data from the position
*
* You can use `seekable` to check whether this file can read at random position.
*/
virtual int seek(int64_t offset) = 0;
virtual bool seekable() const { return true; }
virtual int64_t get_offset() const = 0;
virtual bool eof() const = 0;
/**
* read data until we got `count` bytes data or exception occurs
*
* This routine calls `read` repeatly until we got `count` bytes
* data.
* As usual, the normal `read` try to read data once and return.
*/
int readn(char *buffer, int64_t count, int64_t &read_size);
/**
* A file reader factory
*/
static int open(const ObFileReadParam &param, ObIAllocator &allocator, ObFileReader *& file_reader);
protected:
ObIAllocator &allocator_;
};
/**
* Stream file that can read sequential only
*/
class ObStreamFileReader : public ObFileReader
{
public:
ObStreamFileReader(ObIAllocator &allocator): ObFileReader(allocator) {}
virtual ~ObStreamFileReader() {}
int get_file_size(int64_t &file_size) override { return OB_NOT_SUPPORTED; }
int seek(int64_t offset) override { return OB_NOT_SUPPORTED; }
bool seekable() const override { return false; }
};
class ObRandomFileReader : public ObFileReader
{
public:
ObRandomFileReader(ObIAllocator &allocator);
virtual ~ObRandomFileReader();
int read(char *buf, int64_t count, int64_t &read_size) override;
int seek(int64_t offset) override;
int get_file_size(int64_t &file_size) override;
int64_t get_offset() const override { return offset_; }
bool eof() const override { return eof_; }
int open(const ObString &filename);
private:
ObString filename_;
common::ObFileReader file_reader_;
int64_t offset_;
bool eof_;
bool is_inited_;
};
class ObRandomOSSReader : public ObFileReader
{
public:
ObRandomOSSReader(ObIAllocator &allocator);
virtual ~ObRandomOSSReader();
int open(const share::ObBackupStorageInfo &storage_info, const ObString &filename);
int read(char *buf, int64_t count, int64_t &read_size) override;
int seek(int64_t offset) override;
int get_file_size(int64_t &file_size) override;
int64_t get_offset() const override { return offset_; }
bool eof() const override { return eof_; }
private:
ObIODevice *device_handle_;
ObIOFd fd_;
int64_t offset_;
bool eof_;
bool is_inited_;
};
/**
* A strem file reader whose data source is mysql packets
* Refer to LOAD DATA LOCAL INFILE for more detail.
* Read data flow:
* client send file content through mysql packets
* (@see PacketStreamFileReader::read) and end with an
* empty mysql packet.
*/
class ObPacketStreamFileReader : public ObStreamFileReader
{
public:
ObPacketStreamFileReader(ObIAllocator &allocator);
virtual ~ObPacketStreamFileReader();
int open(const ObString &filename,
observer::ObIMPPacketSender &packet_handle,
ObSQLSessionInfo *session,
int64_t timeout_ts);
int read(char *buf, int64_t count, int64_t &read_size) override;
int64_t get_offset() const override { return read_size_; }
bool eof() const override { return eof_; }
private:
int receive_packet();
/// The packet read from NIO is cached, so we must release it explicitly
/// and then we can reuse the resource
int release_packet();
bool is_timeout() const;
bool is_killed() const;
private:
observer::ObIMPPacketSender *packet_handle_; // We use this handle to read packet from client
ObSQLSessionInfo *session_;
int64_t timeout_ts_; // The deadline of job
// As we read a packet from client, the NIO store the data into the NIO buffer
// and allocate an ObPacket by an allocator(arena_allocator_). The ObPacket(cached_packet_)
// is cached in the memory of allocator.
ObArenaAllocator arena_allocator_;
obmysql::ObMySQLRawPacket *cached_packet_;
int64_t received_size_; // All data received in bytes
int64_t read_size_; // All data has been read in bytes
bool eof_;
};
} // namespace sql
} // namespace oceanbase
#endif // OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_

View File

@ -1388,23 +1388,12 @@ int ObLoadDataSPImpl::next_file_buffer(ObExecContext &ctx,
//从data_trimer中恢复出上次读取剩下的数据
OZ (box.data_trimer.recover_incomplate_data(*handle->data_buffer));
if (ObLoadFileLocation::SERVER_DISK == box.load_file_storage) {
OZ (box.file_reader.pread(handle->data_buffer->current_ptr(),
handle->data_buffer->get_remain_len(),
box.read_cursor.file_offset_,
box.read_cursor.read_size_));
} else {
OZ (box.device_handle_->pread(box.fd_, box.read_cursor.file_offset_,
handle->data_buffer->get_remain_len(),
handle->data_buffer->current_ptr(),
box.read_cursor.read_size_));
}
OZ (box.file_reader->readn(handle->data_buffer->current_ptr(),
handle->data_buffer->get_remain_len(),
box.read_cursor.read_size_));
if (OB_SUCC(ret)) {
if (OB_UNLIKELY(0 == box.read_cursor.read_size_)) {
box.read_cursor.is_end_file_ = true;
LOG_DEBUG("LOAD DATA reach file end", K(box.read_cursor));
} else {
if (OB_LIKELY(box.read_cursor.read_size_ > 0)) {
handle->data_buffer->update_pos(box.read_cursor.read_size_); //更新buffer中数据长度
int64_t last_proccessed_GBs = box.read_cursor.get_total_read_GBs();
box.read_cursor.commit_read();
@ -1414,6 +1403,9 @@ int ObLoadDataSPImpl::next_file_buffer(ObExecContext &ctx,
}
box.job_status->read_bytes_ += box.read_cursor.read_size_;
} else if (box.file_reader->eof()) {
box.read_cursor.is_end_file_ = true;
LOG_DEBUG("LOAD DATA reach file end", K(box.read_cursor));
}
}
@ -2465,12 +2457,10 @@ int ObLoadDataSPImpl::ToolBox::release_resources()
ob_free(expr_buffer);
}
//release fd and device
if (NULL != device_handle_) {
if (fd_.is_valid()) {
device_handle_->close(fd_);
}
common::ObDeviceManager::get_instance().release_device(device_handle_);
//release file reader
if (OB_NOT_NULL(file_reader)) {
file_reader->~ObFileReader();
file_reader = NULL;
}
if (OB_NOT_NULL(temp_handle)) {
@ -2708,12 +2698,6 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
LOG_WARN("fail to gen insert column names buff", K(ret));
} else if (OB_FAIL(data_frag_mgr.init(ctx, load_args.table_id_))) {
LOG_WARN("fail to init data frag mgr", K(ret));
} else if (ObLoadFileLocation::SERVER_DISK != load_file_storage) {
if (OB_FAIL(util.get_and_init_device(device_handle_, &load_args.access_info_, load_args.file_name_))) {
LOG_WARN("fail to get device manager", K(ret), K(load_args.access_info_), K(load_args.file_name_));
} else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
LOG_WARN("fail to set access type", K(ret));
}
}
//init server_info_map
@ -2779,14 +2763,20 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
}
if (OB_SUCC(ret)) {
if (ObLoadFileLocation::SERVER_DISK == load_file_storage) {
OZ (file_reader.open(load_args.file_name_, false));
OX (file_size = get_file_size(load_args.file_name_.ptr()));
} else {
int64_t file_length = -1;
OZ (device_handle_->open(load_args.file_name_.ptr(), -1, 0, fd_, &iod_opts));
OZ (util.get_file_size(device_handle_, fd_, file_length));
OX (file_size = file_length);
file_read_param.file_location_ = load_file_storage;
file_read_param.filename_ = load_args.file_name_;
file_read_param.access_info_ = load_args.access_info_;
file_read_param.packet_handle_ = &ctx.get_my_session()->get_pl_query_sender()->get_packet_sender();
file_read_param.session_ = ctx.get_my_session();
file_read_param.timeout_ts_ = THIS_WORKER.get_timeout_ts();
if (OB_FAIL(ObFileReader::open(file_read_param, ctx.get_allocator(), file_reader))) {
LOG_WARN("failed to open file.", KR(ret), K(file_read_param), K(load_args.file_name_));
} else if (!file_reader->seekable()) {
file_size = -1;
} else if (OB_FAIL(file_reader->get_file_size(file_size))) {
LOG_WARN("fail to get io device file size", KR(ret), K(file_size));
}
}
@ -3075,7 +3065,8 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
}
if (OB_SUCC(ret)) {
int64_t max_task_count = (file_size / ObLoadFileBuffer::MAX_BUFFER_SIZE + 1) * 2;
const int64_t fake_file_size = (file_size > 0) ? file_size : (2 << 30); // use 2G as default in load local mode
int64_t max_task_count = (fake_file_size / ObLoadFileBuffer::MAX_BUFFER_SIZE + 1) * 2;
if (OB_FAIL(file_buf_row_num.reserve(max_task_count))) {
LOG_WARN("fail to reserve", K(ret));
}

View File

@ -31,6 +31,7 @@
#include "sql/engine/cmd/ob_load_data_rpc.h"
#include "sql/engine/ob_des_exec_context.h"
#include "sql/engine/cmd/ob_load_data_parser.h"
#include "sql/engine/cmd/ob_load_data_file_reader.h"
#include "common/storage/ob_io_device.h"
namespace oceanbase
@ -684,7 +685,7 @@ public:
InsertTask,
};
struct ToolBox {
ToolBox() : device_handle_(NULL), fd_(), expr_buffer(nullptr) {}
ToolBox() : expr_buffer(nullptr) {}
int init(ObExecContext &ctx, ObLoadDataStmt &load_stmt);
int build_calc_partid_expr(ObExecContext &ctx,
ObLoadDataStmt &load_stmt,
@ -692,14 +693,13 @@ public:
int release_resources();
//modules
ObFileReader file_reader;
ObIODevice* device_handle_;
ObIOFd fd_;
ObFileReader * file_reader;
ObFileAppender file_appender;
ObFileReadCursor read_cursor;
ObLoadFileDataTrimer data_trimer;
ObInsertValueGenerator generator;
ObDataFragMgr data_frag_mgr;
ObFileReadParam file_read_param;
//running control
ObParallelTaskController shuffle_task_controller;

View File

@ -92,92 +92,31 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
case T_REMOTE_OSS:
load_args.load_file_storage_ = ObLoadFileLocation::OSS;
break;
case T_LOCAL:
//load_args.load_file_storage_ = ObLoadFileLocation::CLIENT_DISK;
//break;
//not support local
case T_LOCAL: {
bool enabled = false;
if (OB_FAIL(local_infile_enabled(enabled))) {
LOG_WARN("failed to check local_infile_enabled", K(ret));
} else if (!enabled) {
ret = OB_ERR_CLIENT_LOCAL_FILES_DISABLED;
LOG_USER_ERROR(OB_ERR_CLIENT_LOCAL_FILES_DISABLED);
} else {
load_args.load_file_storage_ = ObLoadFileLocation::CLIENT_DISK;
}
}
break;
default:
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "load data local");
LOG_USER_ERROR(OB_NOT_SUPPORTED, "unknown location");
}
} else {
load_args.load_file_storage_ = ObLoadFileLocation::SERVER_DISK;
}
LOG_DEBUG("load data location", K(load_args.load_file_storage_));
}
if (OB_SUCC(ret)) {
/* 1. file name */
ObLoadArgument &load_args = load_stmt->get_load_arguments();
ParseNode *file_name_node = node->children_[ENUM_FILE_NAME];
if (OB_ISNULL(file_name_node)
|| OB_UNLIKELY(T_VARCHAR != file_name_node->type_ && T_CHAR != file_name_node->type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid node", "child", file_name_node);
} else {
ObString file_name(file_name_node->str_len_, file_name_node->str_value_);
if (ObLoadFileLocation::OSS != load_args.load_file_storage_) {
load_args.file_name_ = file_name;
const char *p = nullptr;
ObString sub_file_name;
ObString cstyle_file_name; // ends with '\0'
char *full_path_buf = nullptr;
char *actual_path = nullptr;
if (OB_ISNULL(full_path_buf = static_cast<char *>(allocator_->alloc(MAX_PATH_SIZE)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
}
while (OB_SUCC(ret) && !file_name.empty()) {
p = file_name.find(',');
if (nullptr == p) {
sub_file_name = file_name;
cstyle_file_name = sub_file_name;
file_name.reset();
} else {
sub_file_name = file_name.split_on(p);
cstyle_file_name.reset();
}
if (!sub_file_name.empty()) {
if (cstyle_file_name.empty() &&
OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) {
LOG_WARN("fail to write string", KR(ret));
} else if (OB_ISNULL(actual_path = realpath(cstyle_file_name.ptr(), full_path_buf))) {
ret = OB_FILE_NOT_EXIST;
LOG_WARN("file not exist", K(ret), K(cstyle_file_name));
}
if (OB_SUCC(ret)) {
ObString secure_file_priv;
if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) {
LOG_WARN("failed to get secure file priv", K(ret));
} else if (OB_FAIL(
ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) {
LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv),
K(actual_path));
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) {
LOG_WARN("fail to add files", KR(ret));
}
}
}
}
} else {
ObString temp_file_name = file_name.split_on('?');
ObString storage_info;
if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) {
LOG_WARN("fail to copy string", K(ret));
} else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) {
LOG_WARN("fail to copy string", K(ret));
} else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key");
} else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) {
LOG_WARN("failed to set access info", K(ret));
} else if (OB_FAIL(load_args.file_iter_.add_files(&load_args.file_name_))) {
LOG_WARN("fail to add files", KR(ret));
}
}
}
ret = resolve_filename(load_stmt, node);
}
if (OB_SUCC(ret)) {
@ -185,7 +124,10 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
ObLoadArgument &load_args = load_stmt->get_load_arguments();
ObLoadDupActionType dupl_action = ObLoadDupActionType::LOAD_STOP_ON_DUP;
if (NULL == node->children_[ENUM_DUPLICATE_ACTION]) {
if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_) {
if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_ &&
lib::is_mysql_mode()) {
// https://dev.mysql.com/doc/refman/8.0/en/load-data.html
// In MySQL, LOCAL modifier has the same effect as the IGNORE modifier.
dupl_action = ObLoadDupActionType::LOAD_IGNORE;
}
} else if (T_IGNORE == node->children_[ENUM_DUPLICATE_ACTION]->type_) {
@ -561,6 +503,92 @@ int ObLoadDataResolver::resolve_hints(const ParseNode &node)
return ret;
}
int ObLoadDataResolver::resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *node)
{
int ret = OB_SUCCESS;
ObLoadArgument &load_args = load_stmt->get_load_arguments();
ParseNode *file_name_node = node->children_[ENUM_FILE_NAME];
if (OB_ISNULL(file_name_node)
|| OB_UNLIKELY(T_VARCHAR != file_name_node->type_ && T_CHAR != file_name_node->type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid node", "child", file_name_node);
} else {
ObString file_name(file_name_node->str_len_, file_name_node->str_value_);
if (ObLoadFileLocation::OSS != load_args.load_file_storage_) {
load_args.file_name_ = file_name;
const char *p = nullptr;
ObString sub_file_name;
ObString cstyle_file_name; // ends with '\0'
char *full_path_buf = nullptr;
char *actual_path = nullptr;
if (OB_ISNULL(full_path_buf = static_cast<char *>(allocator_->alloc(MAX_PATH_SIZE)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate memory", K(ret));
}
while (OB_SUCC(ret) && !file_name.empty()) {
p = file_name.find(',');
if (nullptr == p) {
sub_file_name = file_name;
cstyle_file_name = sub_file_name;
file_name.reset();
} else {
sub_file_name = file_name.split_on(p);
cstyle_file_name.reset();
}
if (!sub_file_name.empty()) {
if (cstyle_file_name.empty() &&
OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) {
LOG_WARN("fail to write string", KR(ret));
} else if (ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_ &&
OB_ISNULL(actual_path = realpath(cstyle_file_name.ptr(), full_path_buf))) {
ret = OB_FILE_NOT_EXIST;
LOG_WARN("file not exist", K(ret), K(cstyle_file_name));
}
//security check for mysql mode
if (OB_SUCC(ret) && lib::is_mysql_mode() && ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_) {
ObString secure_file_priv;
if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) {
LOG_WARN("failed to get secure file priv", K(ret));
} else if (OB_FAIL(
ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) {
LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv),
K(actual_path));
}
}
if (OB_SUCC(ret)) {
if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_ && load_args.file_iter_.count() != 0) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "load multi files not supported");
} else if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) {
LOG_WARN("fail to add files", KR(ret));
}
}
}
}
} else {
ObString temp_file_name = file_name.split_on('?');
ObString storage_info;
if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) {
LOG_WARN("fail to copy string", K(ret));
} else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) {
LOG_WARN("fail to copy string", K(ret));
} else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key");
} else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) {
LOG_WARN("failed to set access info", K(ret));
} else if (OB_FAIL(load_args.file_iter_.add_files(&load_args.file_name_))) {
LOG_WARN("fail to add files", KR(ret));
}
}
}
return ret;
}
//validation for loaddata statement obeys the following rules:
//0. in loaddata Ver1, only ascii charset are supported.
//1. according to the defined charset, escaped and enclosed valid char length should <= 1.
@ -1349,6 +1377,42 @@ int ObLoadDataResolver::resolve_char_node(const ParseNode &node, int32_t &single
return ret;
}
int ObLoadDataResolver::local_infile_enabled(bool &enabled) const
{
int ret = OB_SUCCESS;
// 1. let's check the system variable and the capability flag in the mysql handshake
enabled = false;
int64_t local_infile_sys_var = 0;
if (OB_ISNULL(session_info_)) {
} else if (OB_FAIL(session_info_->get_sys_variable(share::SYS_VAR_LOCAL_INFILE, local_infile_sys_var))) {
LOG_WARN("failed to get SYS_VAR_LOCAL_INFILE system variable.", KR(ret));
} else {
const int64_t local_infile_capability_flag = session_info_->get_capability().cap_flags_.OB_CLIENT_LOCAL_FILES;
enabled = (local_infile_sys_var != 0) && (local_infile_capability_flag != 0);
LOG_DEBUG("LOCAL_INFILE enabled by system variable and client capability",
K(enabled), K(local_infile_capability_flag), K(local_infile_sys_var));
}
// 2. let's check the client type.
// The obproxy set the capability flag but it does not support load local
if (OB_SUCC(ret) && enabled) {
if (session_info_->get_client_mode() > common::OB_MIN_CLIENT_MODE &&
session_info_->get_client_mode() < OB_MAX_CLIENT_MODE) {
// this is an ob client, such as obclient 2.x, objdbc, obproxy, obclient 1.x is not included
// check the proxy capability flags
obmysql::ObProxyCapabilityFlags proxy_cap = session_info_->get_proxy_cap_flags();
LOG_DEBUG("load local infile: get proxy capability flag",
K(proxy_cap.capability_), K(proxy_cap.is_load_local_support()));
if (!proxy_cap.is_load_local_support()) {
enabled = false;
LOG_INFO("load data local infile is disabled by client: the obclient proxy capability flag is not set");
}
}
}
return ret;
}
int ObLoadDataResolver::check_trigger_constraint(const ObTableSchema *table_schema)
{
int ret = OB_SUCCESS;

View File

@ -57,6 +57,10 @@ public:
const common::ObString &table_name, bool cte_table_fisrt, uint64_t& table_id);
int validate_stmt(ObLoadDataStmt* stmt);
int resolve_hints(const ParseNode &node);
int resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *node);
int local_infile_enabled(bool &enabled) const;
int check_trigger_constraint(const ObTableSchema *table_schema);
private:
enum ParameterEnum {

View File

@ -1031,6 +1031,7 @@ public:
// client mode related
void set_client_mode(const common::ObClientMode mode) { client_mode_ = mode; }
common::ObClientMode get_client_mode() const { return client_mode_; }
bool is_java_client_mode() const { return common::OB_JAVA_CLIENT_MODE == client_mode_; }
bool is_obproxy_mode() const { return common::OB_PROXY_CLIENT_MODE == client_mode_; }