[FEAT MERGE] load local files
This commit is contained in:
		@ -183,6 +183,7 @@ ob_set_subtarget(ob_sql engine_cmd
 | 
			
		||||
  engine/cmd/ob_kill_executor.cpp
 | 
			
		||||
  engine/cmd/ob_kill_session_arg.cpp
 | 
			
		||||
  engine/cmd/ob_load_data_direct_impl.cpp
 | 
			
		||||
  engine/cmd/ob_load_data_file_reader.cpp
 | 
			
		||||
  engine/cmd/ob_load_data_executor.cpp
 | 
			
		||||
  engine/cmd/ob_load_data_impl.cpp
 | 
			
		||||
  engine/cmd/ob_load_data_parser.cpp
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
			
		||||
 * See the Mulan PubL v2 for more details.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#define USING_LOG_PREFIX SQL_ENG
 | 
			
		||||
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_direct_impl.h"
 | 
			
		||||
@ -20,6 +19,7 @@
 | 
			
		||||
#include "observer/table_load/ob_table_load_table_ctx.h"
 | 
			
		||||
#include "observer/table_load/ob_table_load_task.h"
 | 
			
		||||
#include "observer/table_load/ob_table_load_task_scheduler.h"
 | 
			
		||||
#include "observer/mysql/ob_query_driver.h"
 | 
			
		||||
#include "share/schema/ob_schema_getter_guard.h"
 | 
			
		||||
#include "share/ob_device_manager.h"
 | 
			
		||||
#include "share/backup/ob_backup_io_adapter.h"
 | 
			
		||||
@ -226,208 +226,6 @@ int ObLoadDataDirectImpl::Logger::log_error_line(const ObString &file_name, int6
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * RandomFileReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::RandomFileReader::RandomFileReader() : is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::RandomFileReader::~RandomFileReader()
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomFileReader::open(const DataAccessParam &data_access_param, const ObString &filename)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  UNUSED(data_access_param);
 | 
			
		||||
  if (IS_INIT) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
    LOG_WARN("RandomFileReader init twice", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(file_reader_.open(filename.ptr(), false))) {
 | 
			
		||||
    LOG_WARN("fail to open file", KR(ret), K(filename));
 | 
			
		||||
  } else {
 | 
			
		||||
    filename_ = filename;
 | 
			
		||||
    is_inited_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomFileReader::pread(char *buf, int64_t count, int64_t offset, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("RandomFileReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(file_reader_.pread(buf, count, offset, read_size))) {
 | 
			
		||||
    LOG_WARN("fail to pread file buf", KR(ret), K(count), K(offset), K(read_size));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomFileReader::get_file_size(int64_t &file_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("RandomFileReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else {
 | 
			
		||||
    file_size = ::get_file_size(filename_.ptr());
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * RandomOSSReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::RandomOSSReader::RandomOSSReader() : device_handle_(nullptr), is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::RandomOSSReader::~RandomOSSReader()
 | 
			
		||||
{
 | 
			
		||||
  if (fd_.is_valid()) {
 | 
			
		||||
    device_handle_->close(fd_);
 | 
			
		||||
    fd_.reset();
 | 
			
		||||
  }
 | 
			
		||||
  if (nullptr != device_handle_) {
 | 
			
		||||
    common::ObDeviceManager::get_instance().release_device(device_handle_);
 | 
			
		||||
    device_handle_ = nullptr;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomOSSReader::open(const DataAccessParam &data_access_param,
 | 
			
		||||
                                                const ObString &filename)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  ObIODOpt opt;
 | 
			
		||||
  ObIODOpts iod_opts;
 | 
			
		||||
  ObBackupIoAdapter util;
 | 
			
		||||
  iod_opts.opts_ = &opt;
 | 
			
		||||
  iod_opts.opt_cnt_ = 0;
 | 
			
		||||
  if (IS_INIT) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
    LOG_WARN("RandomOSSReader init twice", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(
 | 
			
		||||
        util.get_and_init_device(device_handle_, &data_access_param.access_info_, filename))) {
 | 
			
		||||
    LOG_WARN("fail to get device manager", KR(ret), K(filename));
 | 
			
		||||
  } else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
 | 
			
		||||
    LOG_WARN("fail to set access type", KR(ret));
 | 
			
		||||
  } else if (OB_FAIL(device_handle_->open(to_cstring(filename), -1, 0, fd_, &iod_opts))) {
 | 
			
		||||
    LOG_WARN("fail to open oss file", KR(ret), K(filename));
 | 
			
		||||
  } else {
 | 
			
		||||
    is_inited_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomOSSReader::pread(char *buf, int64_t count, int64_t offset,
 | 
			
		||||
                                                 int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("RandomOSSReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(device_handle_->pread(fd_, offset, count, buf, read_size))) {
 | 
			
		||||
    LOG_WARN("fail to pread oss buf", KR(ret), K(offset), K(count), K(read_size));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::RandomOSSReader::get_file_size(int64_t &file_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  ObBackupIoAdapter util;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("RandomOSSReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(util.get_file_size(device_handle_, fd_, file_size))) {
 | 
			
		||||
    LOG_WARN("fail to get oss file size", KR(ret), K(file_size));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * SequentialDataAccessor
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::SequentialDataAccessor::SequentialDataAccessor()
 | 
			
		||||
  : random_io_device_(nullptr), offset_(0), is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::SequentialDataAccessor::~SequentialDataAccessor()
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::SequentialDataAccessor::init(const DataAccessParam &data_access_param,
 | 
			
		||||
                                                       const ObString &filename)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_INIT) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
    LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor init twice", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_UNLIKELY(!data_access_param.is_valid() || filename.empty())) {
 | 
			
		||||
    ret = OB_INVALID_ARGUMENT;
 | 
			
		||||
    LOG_WARN("invalid args", KR(ret), K(data_access_param), K(filename));
 | 
			
		||||
  } else {
 | 
			
		||||
    if (data_access_param.file_location_ == ObLoadFileLocation::SERVER_DISK) {
 | 
			
		||||
      if (OB_FAIL(random_file_reader_.open(data_access_param, filename))) {
 | 
			
		||||
        LOG_WARN("fail to open random file reader", KR(ret), K(filename));
 | 
			
		||||
      } else {
 | 
			
		||||
        random_io_device_ = &random_file_reader_;
 | 
			
		||||
      }
 | 
			
		||||
    } else if (data_access_param.file_location_ == ObLoadFileLocation::OSS) {
 | 
			
		||||
      if (OB_FAIL(random_oss_reader_.open(data_access_param, filename))) {
 | 
			
		||||
        LOG_WARN("fail to open random oss reader", KR(ret), K(filename));
 | 
			
		||||
      } else {
 | 
			
		||||
        random_io_device_ = &random_oss_reader_;
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      ret = OB_NOT_SUPPORTED;
 | 
			
		||||
      LOG_WARN("not supported load file location", KR(ret), K(data_access_param.file_location_));
 | 
			
		||||
      FORWARD_USER_ERROR_MSG(ret, "not supported load file location");
 | 
			
		||||
    }
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
      is_inited_ = true;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::SequentialDataAccessor::read(char *buf, int64_t count, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_UNLIKELY(nullptr == buf || count <= 0)) {
 | 
			
		||||
    ret = OB_INVALID_ARGUMENT;
 | 
			
		||||
    LOG_WARN("invalid args", KR(ret), KP(buf), K(count));
 | 
			
		||||
  } else {
 | 
			
		||||
    if (OB_FAIL(random_io_device_->pread(buf, count, offset_, read_size))) {
 | 
			
		||||
      LOG_WARN("fail to do pread", KR(ret), K(offset_));
 | 
			
		||||
    } else {
 | 
			
		||||
      offset_ += read_size;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::SequentialDataAccessor::get_file_size(int64_t &file_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObLoadDataDirectImpl::SequentialDataAccessor not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(random_io_device_->get_file_size(file_size))) {
 | 
			
		||||
    LOG_WARN("fail to get random io device file size", KR(ret), K(file_size));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * DataDescIterator
 | 
			
		||||
 */
 | 
			
		||||
@ -637,10 +435,23 @@ void ObLoadDataDirectImpl::DataBuffer::swap(DataBuffer &other)
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::DataReader::DataReader()
 | 
			
		||||
  : execute_ctx_(nullptr), end_offset_(0), read_raw_(false), is_iter_end_(false), is_inited_(false)
 | 
			
		||||
    : execute_ctx_(nullptr),
 | 
			
		||||
      file_reader_(nullptr),
 | 
			
		||||
      end_offset_(0),
 | 
			
		||||
      read_raw_(false),
 | 
			
		||||
      is_iter_end_(false),
 | 
			
		||||
      is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObLoadDataDirectImpl::DataReader::~DataReader()
 | 
			
		||||
{
 | 
			
		||||
  if (OB_NOT_NULL(file_reader_)) {
 | 
			
		||||
    file_reader_->~ObFileReader();
 | 
			
		||||
    file_reader_ = nullptr;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_param,
 | 
			
		||||
                                           LoadExecuteContext &execute_ctx,
 | 
			
		||||
                                           const DataDesc &data_desc, bool read_raw)
 | 
			
		||||
@ -665,13 +476,28 @@ int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_pa
 | 
			
		||||
    }
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
      end_offset_ = data_desc.end_;
 | 
			
		||||
      if (OB_FAIL(io_accessor_.init(data_access_param, data_desc.filename_))) {
 | 
			
		||||
        LOG_WARN("fail to init io device", KR(ret), K(data_desc));
 | 
			
		||||
      } else if (end_offset_ == -1 && OB_FAIL(io_accessor_.get_file_size(end_offset_))) {
 | 
			
		||||
        LOG_WARN("fail to get file size", KR(ret), K(data_desc));
 | 
			
		||||
      } else {
 | 
			
		||||
        io_accessor_.seek(data_desc.start_);
 | 
			
		||||
        ATOMIC_AAF(&execute_ctx_->job_stat_->total_bytes_, (end_offset_ - data_desc.start_));
 | 
			
		||||
 | 
			
		||||
      ObFileReadParam file_read_param;
 | 
			
		||||
      file_read_param.file_location_ = data_access_param.file_location_;
 | 
			
		||||
      file_read_param.filename_      = data_desc.filename_;
 | 
			
		||||
      file_read_param.access_info_   = data_access_param.access_info_;
 | 
			
		||||
      file_read_param.packet_handle_ = &execute_ctx.exec_ctx_.get_session_info()->get_pl_query_sender()->get_packet_sender();
 | 
			
		||||
      file_read_param.session_       = execute_ctx.exec_ctx_.get_session_info();
 | 
			
		||||
      file_read_param.timeout_ts_    = THIS_WORKER.get_timeout_ts();
 | 
			
		||||
 | 
			
		||||
      if (OB_FAIL(ObFileReader::open(file_read_param, *execute_ctx_->allocator_, file_reader_))) {
 | 
			
		||||
        LOG_WARN("failed to open file", KR(ret), K(data_desc));
 | 
			
		||||
      } else if (file_reader_->seekable()) {
 | 
			
		||||
 | 
			
		||||
        if (end_offset_ == -1 && OB_FAIL(file_reader_->get_file_size(end_offset_))) {
 | 
			
		||||
          LOG_WARN("fail to get file size", KR(ret), K(data_desc));
 | 
			
		||||
        } else {
 | 
			
		||||
          file_reader_->seek(data_desc.start_);
 | 
			
		||||
          ATOMIC_AAF(&execute_ctx_->job_stat_->total_bytes_, (end_offset_ - data_desc.start_));
 | 
			
		||||
        }
 | 
			
		||||
      } else if (data_desc.start_ != 0) {
 | 
			
		||||
        ret = OB_NOT_SUPPORTED; // should not happen
 | 
			
		||||
        LOG_WARN("file reader asked to seek while not supported by unseekable file", KR(ret), K(file_read_param));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
@ -681,6 +507,32 @@ int ObLoadDataDirectImpl::DataReader::init(const DataAccessParam &data_access_pa
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::DataReader::read_buffer(ObLoadFileBuffer &file_buffer)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  int64_t read_count = file_buffer.get_remain_len();
 | 
			
		||||
  if (0 == read_count) {
 | 
			
		||||
    ret = OB_BUF_NOT_ENOUGH;
 | 
			
		||||
    LOG_WARN("cannot read more data as buffer is full", KR(ret));
 | 
			
		||||
  } else if (!is_end_file()) {
 | 
			
		||||
    int64_t read_size = 0;
 | 
			
		||||
    if (end_offset_ > 0 && read_count > (end_offset_ - file_reader_->get_offset())) {
 | 
			
		||||
      read_count = end_offset_ - file_reader_->get_offset();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (OB_FAIL(file_reader_->readn(file_buffer.current_ptr(), read_count, read_size))) {
 | 
			
		||||
      LOG_WARN("fail to read file", KR(ret));
 | 
			
		||||
    } else if (0 == read_size) {
 | 
			
		||||
      LOG_TRACE("read nothing", K(is_end_file()));
 | 
			
		||||
    } else {
 | 
			
		||||
      file_buffer.update_pos(read_size); // 更新buffer中数据长度
 | 
			
		||||
      LOG_TRACE("read file sucess", K(read_size));
 | 
			
		||||
      ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataDirectImpl::DataReader::get_next_buffer(ObLoadFileBuffer &file_buffer,
 | 
			
		||||
                                                      int64_t &line_count, int64_t limit)
 | 
			
		||||
{
 | 
			
		||||
@ -704,25 +556,14 @@ int ObLoadDataDirectImpl::DataReader::get_next_buffer(ObLoadFileBuffer &file_buf
 | 
			
		||||
    if (OB_FAIL(data_trimer_.recover_incomplate_data(file_buffer))) {
 | 
			
		||||
      LOG_WARN("fail to recover incomplate data", KR(ret));
 | 
			
		||||
    }
 | 
			
		||||
    // 2. 从文件里读取后续的数据
 | 
			
		||||
    else if (!is_end_file()) {
 | 
			
		||||
      int64_t read_count = 0;
 | 
			
		||||
      int64_t read_size = 0;
 | 
			
		||||
      if (FALSE_IT(read_count =
 | 
			
		||||
                     MIN(file_buffer.get_remain_len(), end_offset_ - io_accessor_.get_offset()))) {
 | 
			
		||||
      } else if (OB_FAIL(io_accessor_.read(file_buffer.current_ptr(), read_count, read_size))) {
 | 
			
		||||
        LOG_WARN("fail to read file", KR(ret));
 | 
			
		||||
      } else if (OB_UNLIKELY(read_count != read_size)) {
 | 
			
		||||
        ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
        LOG_WARN("unexpected read size", KR(ret), K(read_count), K(read_size), K(end_offset_));
 | 
			
		||||
      } else {
 | 
			
		||||
        file_buffer.update_pos(read_size); // 更新buffer中数据长度
 | 
			
		||||
        ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    // 3. 从buffer中找出完整的行,剩下的数据缓存到data_trimer
 | 
			
		||||
    // 2. 读取数据,然后从buffer中找出完整的行,剩下的数据缓存到data_trimer
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
      if (!file_buffer.is_valid()) {
 | 
			
		||||
      int64_t complete_cnt = limit;
 | 
			
		||||
      int64_t complete_len = 0;
 | 
			
		||||
 | 
			
		||||
      if (OB_FAIL(read_buffer(file_buffer))) {
 | 
			
		||||
        LOG_WARN("failed to read buffer as there is not enough data to parse", KR(ret));
 | 
			
		||||
      } else if (!file_buffer.is_valid()) {
 | 
			
		||||
        is_iter_end_ = true;
 | 
			
		||||
        ret = OB_ITER_END;
 | 
			
		||||
      } else {
 | 
			
		||||
@ -760,19 +601,37 @@ int ObLoadDataDirectImpl::DataReader::get_next_raw_buffer(DataBuffer &data_buffe
 | 
			
		||||
  } else if (is_end_file()) {
 | 
			
		||||
    ret = OB_ITER_END;
 | 
			
		||||
  } else if (data_buffer.get_remain_length() > 0) {
 | 
			
		||||
    const int64_t read_count =
 | 
			
		||||
      MIN(data_buffer.get_remain_length(), end_offset_ - io_accessor_.get_offset());
 | 
			
		||||
    int64_t read_count = data_buffer.get_remain_length();
 | 
			
		||||
    if (file_reader_->seekable() && read_count > end_offset_ - file_reader_->get_offset()) {
 | 
			
		||||
      read_count = end_offset_ - file_reader_->get_offset();
 | 
			
		||||
    }
 | 
			
		||||
    int64_t read_size = 0;
 | 
			
		||||
    if (OB_FAIL(io_accessor_.read(data_buffer.data() + data_buffer.get_data_length(), read_count,
 | 
			
		||||
    if (OB_FAIL(file_reader_->readn(data_buffer.data() + data_buffer.get_data_length(), read_count,
 | 
			
		||||
                                  read_size))) {
 | 
			
		||||
      LOG_WARN("fail to read file", KR(ret));
 | 
			
		||||
    } else if (OB_UNLIKELY(read_count != read_size)) {
 | 
			
		||||
      ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
      LOG_WARN("unexpected read size", KR(ret), K(read_count), K(read_size), K(end_offset_));
 | 
			
		||||
    } else {
 | 
			
		||||
    } else if (read_size > 0) {
 | 
			
		||||
      data_buffer.update_data_length(read_size);
 | 
			
		||||
      ATOMIC_AAF(&execute_ctx_->job_stat_->read_bytes_, read_size);
 | 
			
		||||
    }
 | 
			
		||||
    } else {
 | 
			
		||||
      // read_size == 0
 | 
			
		||||
      if (is_end_file()) {
 | 
			
		||||
        ret = OB_ITER_END;
 | 
			
		||||
      } else {
 | 
			
		||||
        ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
        LOG_WARN("read buffer got unexpected bytes.", K(read_size), K(read_count), K(ret));
 | 
			
		||||
      }
 | 
			
		||||
     }
 | 
			
		||||
   }
 | 
			
		||||
   return ret;
 | 
			
		||||
 }
 | 
			
		||||
 | 
			
		||||
bool ObLoadDataDirectImpl::DataReader::is_end_file() const
 | 
			
		||||
{
 | 
			
		||||
  bool ret = false;
 | 
			
		||||
  if (file_reader_->eof()) {
 | 
			
		||||
    ret = true;
 | 
			
		||||
  } else if (end_offset_ > 0) {
 | 
			
		||||
    ret = file_reader_->get_offset() >= end_offset_;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
@ -931,16 +790,32 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
 | 
			
		||||
                                           data_access_param.file_cs_type_))) {
 | 
			
		||||
    ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
    LOG_WARN("unexpected data format", KR(ret), K(data_access_param));
 | 
			
		||||
  } else if (1 == count) {
 | 
			
		||||
  } else if (1 == count || (ObLoadFileLocation::CLIENT_DISK == data_access_param.file_location_)) {
 | 
			
		||||
    if (OB_FAIL(data_desc_iter.add_data_desc(data_desc))) {
 | 
			
		||||
      LOG_WARN("fail to push back", KR(ret));
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    ObArenaAllocator allocator;
 | 
			
		||||
    allocator.set_tenant_id(MTL_ID());
 | 
			
		||||
 | 
			
		||||
    int64_t end_offset = data_desc.end_;
 | 
			
		||||
    SequentialDataAccessor io_device;
 | 
			
		||||
    if (OB_FAIL(io_device.init(data_access_param, data_desc.filename_))) {
 | 
			
		||||
      LOG_WARN("fail to init io device", KR(ret), K(data_desc.filename_));
 | 
			
		||||
    } else if (-1 == end_offset && OB_FAIL(io_device.get_file_size(end_offset))) {
 | 
			
		||||
 | 
			
		||||
    ObFileReadParam file_read_param;
 | 
			
		||||
    file_read_param.file_location_ = data_access_param.file_location_;
 | 
			
		||||
    file_read_param.filename_      = data_desc.filename_;
 | 
			
		||||
    file_read_param.access_info_   = data_access_param.access_info_;
 | 
			
		||||
    file_read_param.packet_handle_ = NULL;
 | 
			
		||||
    file_read_param.session_       = NULL;
 | 
			
		||||
    file_read_param.timeout_ts_    = THIS_WORKER.get_timeout_ts();
 | 
			
		||||
 | 
			
		||||
    ObFileReader *file_reader = NULL;
 | 
			
		||||
    if (OB_FAIL(ObFileReader::open(file_read_param, allocator, file_reader))) {
 | 
			
		||||
      LOG_WARN("failed to open file.", KR(ret), K(data_desc));
 | 
			
		||||
    } else if (!file_reader->seekable()) {
 | 
			
		||||
      if (OB_FAIL(data_desc_iter.add_data_desc(data_desc))) {
 | 
			
		||||
        LOG_WARN("fail to push back", KR(ret));
 | 
			
		||||
      }
 | 
			
		||||
    } else if (-1 == end_offset && OB_FAIL(file_reader->get_file_size(end_offset))) {
 | 
			
		||||
      LOG_WARN("fail to get io device file size", KR(ret), K(end_offset));
 | 
			
		||||
    } else {
 | 
			
		||||
      const int64_t file_size = end_offset - data_desc.start_;
 | 
			
		||||
@ -953,26 +828,24 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
 | 
			
		||||
        const char line_term_char = data_access_param.file_format_.line_term_str_.ptr()[0];
 | 
			
		||||
        const int64_t buf_size = (128LL << 10) + 1;
 | 
			
		||||
        const int64_t split_size = file_size / count;
 | 
			
		||||
        ObArenaAllocator allocator;
 | 
			
		||||
        char *buf = nullptr;
 | 
			
		||||
        int64_t read_size = 0;
 | 
			
		||||
        DataDesc data_desc_ret;
 | 
			
		||||
        data_desc_ret.file_idx_ = data_desc.file_idx_;
 | 
			
		||||
        data_desc_ret.filename_ = data_desc.filename_;
 | 
			
		||||
        data_desc_ret.start_ = data_desc.start_;
 | 
			
		||||
        allocator.set_tenant_id(MTL_ID());
 | 
			
		||||
        if (OB_ISNULL(buf = static_cast<char *>(allocator.alloc(buf_size)))) {
 | 
			
		||||
          ret = OB_ALLOCATE_MEMORY_FAILED;
 | 
			
		||||
          LOG_WARN("fail to alloc memory", KR(ret));
 | 
			
		||||
        }
 | 
			
		||||
        for (int64_t i = 0; OB_SUCC(ret) && i < count - 1; ++i) {
 | 
			
		||||
          int64_t read_offset = data_desc.start_ + split_size * (i + 1);
 | 
			
		||||
          io_device.seek(read_offset);
 | 
			
		||||
				  file_reader->seek(read_offset);
 | 
			
		||||
          char *found = nullptr;
 | 
			
		||||
          while (OB_SUCC(ret) && end_offset > io_device.get_offset() && nullptr == found) {
 | 
			
		||||
            read_offset = io_device.get_offset();
 | 
			
		||||
          while (OB_SUCC(ret) && end_offset > file_reader->get_offset() && nullptr == found) {
 | 
			
		||||
            read_offset = file_reader->get_offset();
 | 
			
		||||
            const int64_t read_count = MIN(end_offset - read_offset, buf_size - 1);
 | 
			
		||||
            if (OB_FAIL(io_device.read(buf, read_count, read_size))) {
 | 
			
		||||
            if (OB_FAIL(file_reader->readn(buf, read_count, read_size))) {
 | 
			
		||||
              LOG_WARN("fail to do read", KR(ret), K(read_offset), K(read_count));
 | 
			
		||||
            } else if (OB_UNLIKELY(read_count != read_size)) {
 | 
			
		||||
              ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
@ -1004,6 +877,11 @@ int ObLoadDataDirectImpl::SimpleDataSplitUtils::split(const DataAccessParam &dat
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (OB_NOT_NULL(file_reader)) {
 | 
			
		||||
      file_reader->~ObFileReader();
 | 
			
		||||
      allocator.free(file_reader);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
@ -1139,6 +1017,7 @@ int ObLoadDataDirectImpl::FileLoadExecutor::execute()
 | 
			
		||||
      LOG_WARN("fail to prepare execute", KR(ret));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LOG_TRACE("file load executor prepare execute done", K(ret));
 | 
			
		||||
    while (OB_SUCC(ret) && OB_SUCC(execute_ctx_->exec_ctx_.check_status())) {
 | 
			
		||||
      TaskHandle *handle = nullptr;
 | 
			
		||||
      if (OB_FAIL(get_next_task_handle(handle))) {
 | 
			
		||||
@ -1177,8 +1056,9 @@ int ObLoadDataDirectImpl::FileLoadExecutor::execute()
 | 
			
		||||
        LOG_WARN("fail to handle all task result", KR(ret));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  LOG_TRACE("large file load executor init done", K(ret));
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,6 @@
 | 
			
		||||
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
			
		||||
 * See the Mulan PubL v2 for more details.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include "lib/allocator/page_arena.h"
 | 
			
		||||
@ -20,6 +19,7 @@
 | 
			
		||||
#include "share/table/ob_table_load_define.h"
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_impl.h"
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_parser.h"
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_file_reader.h"
 | 
			
		||||
#include "common/storage/ob_io_device.h"
 | 
			
		||||
#include "observer/table_load/ob_table_load_exec_ctx.h"
 | 
			
		||||
#include "observer/table_load/ob_table_load_instance.h"
 | 
			
		||||
@ -170,61 +170,6 @@ private:
 | 
			
		||||
    int64_t pos_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class IRandomIODevice
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    virtual ~IRandomIODevice() = default;
 | 
			
		||||
    virtual int open(const DataAccessParam &data_access_param, const ObString &filename) = 0;
 | 
			
		||||
    virtual int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) = 0;
 | 
			
		||||
    virtual int get_file_size(int64_t &file_size) = 0;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class RandomFileReader : public IRandomIODevice
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    RandomFileReader();
 | 
			
		||||
    virtual ~RandomFileReader();
 | 
			
		||||
    int open(const DataAccessParam &data_access_param, const ObString &filename) override;
 | 
			
		||||
    int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) override;
 | 
			
		||||
    int get_file_size(int64_t &file_size) override;
 | 
			
		||||
  private:
 | 
			
		||||
    ObString filename_;
 | 
			
		||||
    ObFileReader file_reader_;
 | 
			
		||||
    bool is_inited_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class RandomOSSReader : public IRandomIODevice
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    RandomOSSReader();
 | 
			
		||||
    virtual ~RandomOSSReader();
 | 
			
		||||
    int open(const DataAccessParam &data_access_param, const ObString &filename) override;
 | 
			
		||||
    int pread(char *buf, int64_t count, int64_t offset, int64_t &read_size) override;
 | 
			
		||||
    int get_file_size(int64_t &file_size) override;
 | 
			
		||||
  private:
 | 
			
		||||
    ObIODevice *device_handle_;
 | 
			
		||||
    ObIOFd fd_;
 | 
			
		||||
    bool is_inited_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  class SequentialDataAccessor
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    SequentialDataAccessor();
 | 
			
		||||
    ~SequentialDataAccessor();
 | 
			
		||||
    int init(const DataAccessParam &data_access_param, const ObString &filename);
 | 
			
		||||
    int read(char *buf, int64_t count, int64_t &read_size);
 | 
			
		||||
    int get_file_size(int64_t &file_size);
 | 
			
		||||
    void seek(int64_t offset) { offset_ = offset; }
 | 
			
		||||
    int64_t get_offset() const { return offset_; }
 | 
			
		||||
  private:
 | 
			
		||||
    RandomFileReader random_file_reader_;
 | 
			
		||||
    RandomOSSReader random_oss_reader_;
 | 
			
		||||
    IRandomIODevice *random_io_device_;
 | 
			
		||||
    int64_t offset_;
 | 
			
		||||
    bool is_inited_;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  struct DataBuffer
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
@ -256,20 +201,25 @@ private:
 | 
			
		||||
  {
 | 
			
		||||
  public:
 | 
			
		||||
    DataReader();
 | 
			
		||||
    ~DataReader();
 | 
			
		||||
    int init(const DataAccessParam &data_access_param, LoadExecuteContext &execute_ctx,
 | 
			
		||||
             const DataDesc &data_desc, bool read_raw = false);
 | 
			
		||||
    int get_next_buffer(ObLoadFileBuffer &file_buffer, int64_t &line_count,
 | 
			
		||||
                        int64_t limit = INT64_MAX);
 | 
			
		||||
    int get_next_raw_buffer(DataBuffer &data_buffer);
 | 
			
		||||
    bool has_incomplate_data() const { return data_trimer_.has_incomplate_data(); }
 | 
			
		||||
    bool is_end_file() const { return io_accessor_.get_offset() >= end_offset_; }
 | 
			
		||||
    bool is_end_file() const;
 | 
			
		||||
    ObCSVGeneralParser &get_csv_parser() { return csv_parser_; }
 | 
			
		||||
 | 
			
		||||
  private:
 | 
			
		||||
    int read_buffer(ObLoadFileBuffer &file_buffer);
 | 
			
		||||
 | 
			
		||||
  private:
 | 
			
		||||
    LoadExecuteContext *execute_ctx_;
 | 
			
		||||
    ObCSVGeneralParser csv_parser_; // 用来计算完整行
 | 
			
		||||
    ObLoadFileDataTrimer data_trimer_; // 缓存不完整行的数据
 | 
			
		||||
    SequentialDataAccessor io_accessor_;
 | 
			
		||||
    int64_t end_offset_;
 | 
			
		||||
    ObFileReader *file_reader_;
 | 
			
		||||
    int64_t end_offset_; // use -1 in stream file such as load data local
 | 
			
		||||
    bool read_raw_;
 | 
			
		||||
    bool is_iter_end_;
 | 
			
		||||
    bool is_inited_;
 | 
			
		||||
 | 
			
		||||
@ -38,6 +38,7 @@ int ObLoadDataExecutor::check_is_direct_load(ObTableDirectInsertCtx &ctx, const
 | 
			
		||||
  } else {
 | 
			
		||||
    ctx.set_is_direct(false);
 | 
			
		||||
  }
 | 
			
		||||
  LOG_INFO("check load data is direct done.", K(ctx.get_is_direct()));
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -70,6 +71,8 @@ int ObLoadDataExecutor::execute(ObExecContext &ctx, ObLoadDataStmt &stmt)
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
    if (OB_FAIL(load_impl->execute(ctx, stmt))) {
 | 
			
		||||
      LOG_WARN("failed to execute load data stmt", K(ret));
 | 
			
		||||
    } else {
 | 
			
		||||
      LOG_TRACE("load data success");
 | 
			
		||||
    }
 | 
			
		||||
    load_impl->~ObLoadDataBase();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										458
									
								
								src/sql/engine/cmd/ob_load_data_file_reader.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										458
									
								
								src/sql/engine/cmd/ob_load_data_file_reader.cpp
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,458 @@
 | 
			
		||||
/**
 | 
			
		||||
 * Copyright (c) 2021 OceanBase
 | 
			
		||||
 * OceanBase CE is licensed under Mulan PubL v2.
 | 
			
		||||
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | 
			
		||||
 * You may obtain a copy of Mulan PubL v2 at:
 | 
			
		||||
 *          http://license.coscl.org.cn/MulanPubL-2.0
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
			
		||||
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
			
		||||
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
			
		||||
 * See the Mulan PubL v2 for more details.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#define USING_LOG_PREFIX  SQL_ENG
 | 
			
		||||
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_file_reader.h"
 | 
			
		||||
#include "share/ob_device_manager.h"
 | 
			
		||||
#include "share/backup/ob_backup_io_adapter.h"
 | 
			
		||||
#include "rpc/obmysql/ob_i_cs_mem_pool.h"
 | 
			
		||||
#include "rpc/obmysql/ob_mysql_packet.h"
 | 
			
		||||
#include "rpc/obmysql/packet/ompk_local_infile.h"
 | 
			
		||||
 | 
			
		||||
namespace oceanbase
 | 
			
		||||
{
 | 
			
		||||
namespace sql
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ObFileReadParam
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObFileReadParam::ObFileReadParam()
 | 
			
		||||
    : packet_handle_(NULL),
 | 
			
		||||
      session_(NULL),
 | 
			
		||||
      timeout_ts_(-1)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ObFileReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
int ObFileReader::open(const ObFileReadParam ¶m, ObIAllocator &allocator, ObFileReader *& file_reader)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (param.file_location_ == ObLoadFileLocation::SERVER_DISK) {
 | 
			
		||||
    ObRandomFileReader *tmp_reader = OB_NEWx(ObRandomFileReader, &allocator, allocator);
 | 
			
		||||
    if (OB_ISNULL(tmp_reader)) {
 | 
			
		||||
      LOG_WARN("failed to create ObRandomFileReader", K(ret));
 | 
			
		||||
    } else if (OB_FAIL(tmp_reader->open(param.filename_))) {
 | 
			
		||||
      LOG_WARN("fail to open random file reader", KR(ret), K(param.filename_));
 | 
			
		||||
      tmp_reader->~ObRandomFileReader();
 | 
			
		||||
      allocator.free(tmp_reader);
 | 
			
		||||
    } else {
 | 
			
		||||
      file_reader = tmp_reader;
 | 
			
		||||
    }
 | 
			
		||||
  } else if (param.file_location_ == ObLoadFileLocation::OSS) {
 | 
			
		||||
    ObRandomOSSReader *tmp_reader = OB_NEWx(ObRandomOSSReader, &allocator, allocator);
 | 
			
		||||
    if (OB_ISNULL(tmp_reader)) {
 | 
			
		||||
      LOG_WARN("failed to create RandomOSSReader", K(ret));
 | 
			
		||||
    } else if (OB_FAIL(tmp_reader->open(param.access_info_, param.filename_))) {
 | 
			
		||||
      LOG_WARN("fail to open random oss reader", KR(ret), K(param.filename_));
 | 
			
		||||
      tmp_reader->~ObRandomOSSReader();
 | 
			
		||||
      allocator.free(tmp_reader);
 | 
			
		||||
    } else {
 | 
			
		||||
      file_reader = tmp_reader;
 | 
			
		||||
    }
 | 
			
		||||
  } else if (param.file_location_ == ObLoadFileLocation::CLIENT_DISK) {
 | 
			
		||||
    if (OB_ISNULL(param.packet_handle_)) {
 | 
			
		||||
      ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
      LOG_WARN("cannot create packet stream file reader while the packet handle is null", K(ret));
 | 
			
		||||
    } else {
 | 
			
		||||
      ObPacketStreamFileReader *tmp_reader = OB_NEWx(ObPacketStreamFileReader, &allocator, allocator);
 | 
			
		||||
      if (OB_ISNULL(tmp_reader)) {
 | 
			
		||||
        LOG_WARN("failed to create ObPacketStreamFileReader", K(ret));
 | 
			
		||||
      } else if (OB_FAIL(tmp_reader->open(param.filename_, *param.packet_handle_, param.session_, param.timeout_ts_))) {
 | 
			
		||||
        LOG_WARN("failed to open packet stream file reader", KR(ret), K(param.filename_));
 | 
			
		||||
        tmp_reader->~ObPacketStreamFileReader();
 | 
			
		||||
        allocator.free(tmp_reader);
 | 
			
		||||
      } else {
 | 
			
		||||
        file_reader = tmp_reader;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    ret = OB_NOT_SUPPORTED;
 | 
			
		||||
    LOG_WARN("not supported load file location", KR(ret), K(param.file_location_));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObFileReader::readn(char *buffer, int64_t count, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  read_size = 0;
 | 
			
		||||
  while (OB_SUCC(ret) && !eof() && read_size < count) {
 | 
			
		||||
    int64_t this_read_size = 0;
 | 
			
		||||
    ret = this->read(buffer + read_size, count - read_size, this_read_size);
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
      read_size += this_read_size;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ObRandomFileReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObRandomFileReader::ObRandomFileReader(ObIAllocator &allocator)
 | 
			
		||||
    : ObFileReader(allocator),
 | 
			
		||||
      offset_(0),
 | 
			
		||||
      eof_(false),
 | 
			
		||||
      is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObRandomFileReader::~ObRandomFileReader()
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomFileReader::open(const ObString &filename)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (is_inited_) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
    LOG_WARN("ObRandomFileReader init twice", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(file_reader_.open(filename.ptr(), false))) {
 | 
			
		||||
    LOG_WARN("fail to open file", KR(ret), K(filename));
 | 
			
		||||
  } else {
 | 
			
		||||
    filename_ = filename;
 | 
			
		||||
    offset_ = 0;
 | 
			
		||||
    eof_ = false;
 | 
			
		||||
    is_inited_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomFileReader::read(char *buf, int64_t count, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (!is_inited_) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObRandomFileReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(file_reader_.pread(buf, count, offset_, read_size))) {
 | 
			
		||||
    LOG_WARN("fail to pread file buf", KR(ret), K(count), K_(offset), K(read_size));
 | 
			
		||||
  } else if (0 == read_size) {
 | 
			
		||||
    eof_ = true;
 | 
			
		||||
  } else {
 | 
			
		||||
    offset_ += read_size;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomFileReader::seek(int64_t offset)
 | 
			
		||||
{
 | 
			
		||||
  offset_ = offset;
 | 
			
		||||
  return OB_SUCCESS;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomFileReader::get_file_size(int64_t &file_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (!is_inited_) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObRandomFileReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else {
 | 
			
		||||
    file_size = ::get_file_size(filename_.ptr());
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ObRandomOSSReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
ObRandomOSSReader::ObRandomOSSReader(ObIAllocator &allocator)
 | 
			
		||||
    : ObFileReader(allocator),
 | 
			
		||||
      device_handle_(nullptr),
 | 
			
		||||
      offset_(0),
 | 
			
		||||
      eof_(false),
 | 
			
		||||
      is_inited_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObRandomOSSReader::~ObRandomOSSReader()
 | 
			
		||||
{
 | 
			
		||||
  if (fd_.is_valid()) {
 | 
			
		||||
    device_handle_->close(fd_);
 | 
			
		||||
    fd_.reset();
 | 
			
		||||
  }
 | 
			
		||||
  if (nullptr != device_handle_) {
 | 
			
		||||
    common::ObDeviceManager::get_instance().release_device(device_handle_);
 | 
			
		||||
    device_handle_ = nullptr;
 | 
			
		||||
  }
 | 
			
		||||
  is_inited_ = false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomOSSReader::open(const share::ObBackupStorageInfo &storage_info, const ObString &filename)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  ObIODOpt opt;
 | 
			
		||||
  ObIODOpts iod_opts;
 | 
			
		||||
  ObBackupIoAdapter util;
 | 
			
		||||
  iod_opts.opts_ = &opt;
 | 
			
		||||
  iod_opts.opt_cnt_ = 0;
 | 
			
		||||
  if (IS_INIT) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
    LOG_WARN("ObRandomOSSReader init twice", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(
 | 
			
		||||
        util.get_and_init_device(device_handle_, &storage_info, filename))) {
 | 
			
		||||
    LOG_WARN("fail to get device manager", KR(ret), K(filename));
 | 
			
		||||
  } else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
 | 
			
		||||
    LOG_WARN("fail to set access type", KR(ret));
 | 
			
		||||
  } else if (OB_FAIL(device_handle_->open(to_cstring(filename), -1, 0, fd_, &iod_opts))) {
 | 
			
		||||
    LOG_WARN("fail to open oss file", KR(ret), K(filename));
 | 
			
		||||
  } else {
 | 
			
		||||
    offset_ = 0;
 | 
			
		||||
    eof_ = false;
 | 
			
		||||
    is_inited_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomOSSReader::read(char *buf, int64_t count, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObRandomOSSReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(device_handle_->pread(fd_, offset_, count, buf, read_size))) {
 | 
			
		||||
    LOG_WARN("fail to pread oss buf", KR(ret), K_(offset), K(count), K(read_size));
 | 
			
		||||
  } else if (0 == read_size) {
 | 
			
		||||
    eof_ = true;
 | 
			
		||||
  } else {
 | 
			
		||||
    offset_ += read_size;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomOSSReader::seek(int64_t offset)
 | 
			
		||||
{
 | 
			
		||||
  offset_ = offset;
 | 
			
		||||
  return OB_SUCCESS;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObRandomOSSReader::get_file_size(int64_t &file_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  ObBackupIoAdapter util;
 | 
			
		||||
  if (IS_NOT_INIT) {
 | 
			
		||||
    ret = OB_NOT_INIT;
 | 
			
		||||
    LOG_WARN("ObRandomOSSReader not init", KR(ret), KP(this));
 | 
			
		||||
  } else if (OB_FAIL(util.get_file_size(device_handle_, fd_, file_size))) {
 | 
			
		||||
    LOG_WARN("fail to get oss file size", KR(ret), K(file_size));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * ObPacketStreamFileReader
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
class CSMemPoolAdaptor : public obmysql::ObICSMemPool
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  explicit CSMemPoolAdaptor(ObIAllocator *allocator)
 | 
			
		||||
      : allocator_(allocator)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  virtual ~CSMemPoolAdaptor() {}
 | 
			
		||||
 | 
			
		||||
  void *alloc(int64_t size) override
 | 
			
		||||
  {
 | 
			
		||||
    return allocator_->alloc(size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  ObIAllocator *allocator_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
ObPacketStreamFileReader::ObPacketStreamFileReader(ObIAllocator &allocator)
 | 
			
		||||
    : ObStreamFileReader(allocator),
 | 
			
		||||
      packet_handle_(NULL),
 | 
			
		||||
      session_(NULL),
 | 
			
		||||
      timeout_ts_(INT64_MAX),
 | 
			
		||||
      arena_allocator_(allocator),
 | 
			
		||||
      cached_packet_(NULL),
 | 
			
		||||
      received_size_(0),
 | 
			
		||||
      read_size_(0),
 | 
			
		||||
      eof_(false)
 | 
			
		||||
{
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ObPacketStreamFileReader::~ObPacketStreamFileReader()
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
 | 
			
		||||
  // We read all data from client before close the file.
 | 
			
		||||
  // We will stop to handle the process while something error.
 | 
			
		||||
  // But the client must send all file content to us and the
 | 
			
		||||
  // normal SQL processor cannot handle the packets, so we
 | 
			
		||||
  // eat all packets with file content.
 | 
			
		||||
  timeout_ts_ = -1;
 | 
			
		||||
  while (!eof_ && OB_SUCC(ret)) {
 | 
			
		||||
    ret = receive_packet();
 | 
			
		||||
  }
 | 
			
		||||
  arena_allocator_.reset();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObPacketStreamFileReader::open(const ObString &filename,
 | 
			
		||||
                                   observer::ObIMPPacketSender &packet_handle,
 | 
			
		||||
                                   ObSQLSessionInfo *session,
 | 
			
		||||
                                   int64_t timeout_ts)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (OB_NOT_NULL(packet_handle_)) {
 | 
			
		||||
    ret = OB_INIT_TWICE;
 | 
			
		||||
  } else {
 | 
			
		||||
 | 
			
		||||
    // in `load data local` request, we should send the filename to client
 | 
			
		||||
    obmysql::OMPKLocalInfile filename_packet;
 | 
			
		||||
    filename_packet.set_filename(filename);
 | 
			
		||||
    if (OB_FAIL(packet_handle.response_packet(filename_packet, session))) {
 | 
			
		||||
      LOG_INFO("failed to send local infile packet to client", K(ret), K(filename));
 | 
			
		||||
    } else if (OB_FAIL(packet_handle.flush_buffer(false/*is_last*/))) {
 | 
			
		||||
      LOG_INFO("failed to flush socket buffer while send local infile packet", K(ret), K(filename));
 | 
			
		||||
    } else {
 | 
			
		||||
      LOG_TRACE("send filename to client success", K(filename));
 | 
			
		||||
 | 
			
		||||
      observer::ObSMConnection *sm_connection = session->get_sm_connection();
 | 
			
		||||
      if (OB_NOT_NULL(sm_connection) &&
 | 
			
		||||
          sm_connection->pkt_rec_wrapper_.enable_proto_dia()) {
 | 
			
		||||
        sm_connection->pkt_rec_wrapper_.record_send_mysql_pkt(filename_packet,
 | 
			
		||||
                        filename_packet.get_serialize_size() + OB_MYSQL_HEADER_LENGTH);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    packet_handle_ = &packet_handle;
 | 
			
		||||
    session_       = session;
 | 
			
		||||
    timeout_ts_    = timeout_ts;
 | 
			
		||||
    received_size_ = 0;
 | 
			
		||||
    read_size_     = 0;
 | 
			
		||||
    eof_           = false;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * As decripted in MySQL/MariaDB document, client send the file content with
 | 
			
		||||
 * continous packets and `eof` with an empty packet. Every non-empty packet
 | 
			
		||||
 * has the format:
 | 
			
		||||
 * -------------------
 | 
			
		||||
 * MySQL Packet Header
 | 
			
		||||
 * string<EOF>
 | 
			
		||||
 * -------------------
 | 
			
		||||
 * The notation is "string<EOF>" Strings whose length will be calculated by
 | 
			
		||||
 * the packet remaining length.
 | 
			
		||||
 */
 | 
			
		||||
int ObPacketStreamFileReader::read(char *buf, int64_t count, int64_t &read_size)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (OB_ISNULL(cached_packet_) || read_size_ == received_size_) {
 | 
			
		||||
    ret = receive_packet();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const int64_t remain_in_packet = received_size_ - read_size_;
 | 
			
		||||
  if (OB_SUCC(ret) && OB_NOT_NULL(cached_packet_) && (!eof_ || remain_in_packet > 0)) {
 | 
			
		||||
    read_size = MIN(count, remain_in_packet);
 | 
			
		||||
    // a MySQL packet contains a header and payload. The payload is the file content here.
 | 
			
		||||
    // In the mysql_packet code, it use the first byte as MySQL command, but there is no
 | 
			
		||||
    // MySQL command in the file content packet, so we backward 1 byte.
 | 
			
		||||
    const int64_t packet_offset = cached_packet_->get_pkt_len() - remain_in_packet;
 | 
			
		||||
    MEMCPY(buf, cached_packet_->get_cdata() - 1 + packet_offset, read_size);
 | 
			
		||||
    read_size_ += read_size;
 | 
			
		||||
  } else {
 | 
			
		||||
    read_size = 0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (is_timeout()) {
 | 
			
		||||
    ret = OB_TIMEOUT;
 | 
			
		||||
    LOG_WARN("load data reader file timeout", KR(ret));
 | 
			
		||||
  } else if (session_ != NULL && session_->is_query_killed()) {
 | 
			
		||||
    ret = OB_ERR_QUERY_INTERRUPTED;
 | 
			
		||||
    LOG_WARN("load data reader terminated as the query is killed", KR(ret));
 | 
			
		||||
  } else if (session_ != NULL && session_->is_zombie()) {
 | 
			
		||||
    ret = OB_SESSION_KILLED;
 | 
			
		||||
    LOG_WARN("load data reader terminated as the session is killed", KR(ret));
 | 
			
		||||
  } else if (!eof_ && read_size == 0) {
 | 
			
		||||
    ret = OB_IO_ERROR;
 | 
			
		||||
    LOG_WARN("[should not happen] cannot read data but eof is false", KR(ret));
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObPacketStreamFileReader::receive_packet()
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  ret = release_packet();
 | 
			
		||||
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
    arena_allocator_.reuse();
 | 
			
		||||
    CSMemPoolAdaptor mem_pool(&arena_allocator_);
 | 
			
		||||
 | 
			
		||||
    // We read packet until we got one or timeout or error occurs
 | 
			
		||||
    obmysql::ObMySQLPacket *pkt = NULL;
 | 
			
		||||
    ret = packet_handle_->read_packet(mem_pool, pkt);
 | 
			
		||||
    cached_packet_ = static_cast<obmysql::ObMySQLRawPacket *>(pkt);
 | 
			
		||||
 | 
			
		||||
    while (OB_SUCC(ret) && OB_ISNULL(cached_packet_) && !is_timeout() && !is_killed()) {
 | 
			
		||||
      // sleep can reduce cpu usage while the network is not so good.
 | 
			
		||||
      // We need not worry about the speed while the speed of load data core is lower than
 | 
			
		||||
      // file receiver's.
 | 
			
		||||
      usleep(100 * 1000); // 100 ms
 | 
			
		||||
      ret = packet_handle_->read_packet(mem_pool, pkt);
 | 
			
		||||
      cached_packet_ = static_cast<obmysql::ObMySQLRawPacket *>(pkt);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (OB_SUCC(ret) && OB_NOT_NULL(cached_packet_)) {
 | 
			
		||||
      const int pkt_len = cached_packet_->get_pkt_len();
 | 
			
		||||
      if (0 == pkt_len) { // empty packet
 | 
			
		||||
        eof_ = true;
 | 
			
		||||
        (void)release_packet();
 | 
			
		||||
      } else {
 | 
			
		||||
        received_size_ += pkt_len;
 | 
			
		||||
        LOG_TRACE("got a packet", K(pkt_len));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // If anything wrong, we end the reading
 | 
			
		||||
  if (OB_FAIL(ret)) {
 | 
			
		||||
    eof_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObPacketStreamFileReader::release_packet()
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
  if (OB_NOT_NULL(cached_packet_)) {
 | 
			
		||||
    ret = packet_handle_->release_packet(cached_packet_);
 | 
			
		||||
    cached_packet_ = NULL;
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool ObPacketStreamFileReader::is_timeout() const
 | 
			
		||||
{
 | 
			
		||||
  return timeout_ts_ != -1 && ObTimeUtility::current_time() >= timeout_ts_;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool ObPacketStreamFileReader::is_killed() const
 | 
			
		||||
{
 | 
			
		||||
  return NULL != session_ && (session_->is_query_killed() || session_->is_zombie());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace sql
 | 
			
		||||
} // namespace oceanbase
 | 
			
		||||
							
								
								
									
										205
									
								
								src/sql/engine/cmd/ob_load_data_file_reader.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								src/sql/engine/cmd/ob_load_data_file_reader.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,205 @@
 | 
			
		||||
/**
 | 
			
		||||
 * Copyright (c) 2021 OceanBase
 | 
			
		||||
 * OceanBase CE is licensed under Mulan PubL v2.
 | 
			
		||||
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | 
			
		||||
 * You may obtain a copy of Mulan PubL v2 at:
 | 
			
		||||
 *          http://license.coscl.org.cn/MulanPubL-2.0
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
			
		||||
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
			
		||||
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
			
		||||
 * See the Mulan PubL v2 for more details.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_
 | 
			
		||||
#define OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_
 | 
			
		||||
 | 
			
		||||
#include "lib/oblog/ob_log_module.h"
 | 
			
		||||
#include "lib/string/ob_string.h"
 | 
			
		||||
#include "lib/allocator/ob_allocator.h"
 | 
			
		||||
#include "lib/file/ob_file.h"
 | 
			
		||||
#include "sql/resolver/cmd/ob_load_data_stmt.h"
 | 
			
		||||
#include "share/backup/ob_backup_struct.h"
 | 
			
		||||
#include "observer/mysql/obmp_packet_sender.h"
 | 
			
		||||
 | 
			
		||||
namespace oceanbase
 | 
			
		||||
{
 | 
			
		||||
namespace sql
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
class ObSQLSessionInfo;
 | 
			
		||||
 | 
			
		||||
struct ObFileReadParam
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObFileReadParam();
 | 
			
		||||
  TO_STRING_KV(K_(file_location), K_(filename), K_(timeout_ts));
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  ObLoadFileLocation file_location_;
 | 
			
		||||
  ObString filename_;
 | 
			
		||||
  share::ObBackupStorageInfo access_info_;
 | 
			
		||||
  observer::ObIMPPacketSender *packet_handle_;
 | 
			
		||||
  ObSQLSessionInfo *session_;
 | 
			
		||||
  int64_t timeout_ts_;  // A job always has a deadline and file reading may cost a long time
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ObFileReader
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObFileReader(ObIAllocator &allocator) : allocator_(allocator) {}
 | 
			
		||||
  virtual ~ObFileReader() {}
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * read data from file into the buffer
 | 
			
		||||
   *
 | 
			
		||||
   * @note read_size equals to 0 does not mean end of file.
 | 
			
		||||
   *       You should call `eof` to decide whether end of file.
 | 
			
		||||
   *       This is not the same with the system call `read`.
 | 
			
		||||
   */
 | 
			
		||||
  virtual int  read(char *buf, int64_t count, int64_t &read_size) = 0;
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * get the file size
 | 
			
		||||
   *
 | 
			
		||||
   * Stream files may not support this feature.
 | 
			
		||||
   */
 | 
			
		||||
  virtual int  get_file_size(int64_t &file_size) = 0;
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * seek to the specific position and the `read` subsequently fetch data from the position
 | 
			
		||||
   *
 | 
			
		||||
   * You can use `seekable` to check whether this file can read at random position.
 | 
			
		||||
   */
 | 
			
		||||
  virtual int  seek(int64_t offset) = 0;
 | 
			
		||||
  virtual bool seekable() const { return true; }
 | 
			
		||||
  virtual int64_t get_offset() const = 0;
 | 
			
		||||
  virtual bool eof() const = 0;
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * read data until we got `count` bytes data or exception occurs
 | 
			
		||||
   *
 | 
			
		||||
   * This routine calls `read` repeatly until we got `count` bytes
 | 
			
		||||
   * data.
 | 
			
		||||
   * As usual, the normal `read` try to read data once and return.
 | 
			
		||||
   */
 | 
			
		||||
  int readn(char *buffer, int64_t count, int64_t &read_size);
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
   * A file reader factory
 | 
			
		||||
   */
 | 
			
		||||
  static int open(const ObFileReadParam ¶m, ObIAllocator &allocator, ObFileReader *& file_reader);
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
  ObIAllocator &allocator_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Stream file that can read sequential only
 | 
			
		||||
 */
 | 
			
		||||
class ObStreamFileReader : public ObFileReader
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObStreamFileReader(ObIAllocator &allocator): ObFileReader(allocator) {}
 | 
			
		||||
  virtual ~ObStreamFileReader() {}
 | 
			
		||||
 | 
			
		||||
  int  get_file_size(int64_t &file_size) override { return OB_NOT_SUPPORTED; }
 | 
			
		||||
  int  seek(int64_t offset) override { return OB_NOT_SUPPORTED; }
 | 
			
		||||
  bool seekable() const override { return false; }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ObRandomFileReader : public ObFileReader
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObRandomFileReader(ObIAllocator &allocator);
 | 
			
		||||
  virtual ~ObRandomFileReader();
 | 
			
		||||
 | 
			
		||||
  int  read(char *buf, int64_t count, int64_t &read_size) override;
 | 
			
		||||
  int  seek(int64_t offset) override;
 | 
			
		||||
  int  get_file_size(int64_t &file_size) override;
 | 
			
		||||
  int64_t get_offset() const override { return offset_; }
 | 
			
		||||
  bool eof() const override { return eof_; }
 | 
			
		||||
 | 
			
		||||
  int open(const ObString &filename);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  ObString             filename_;
 | 
			
		||||
  common::ObFileReader file_reader_;
 | 
			
		||||
  int64_t              offset_;
 | 
			
		||||
  bool                 eof_;
 | 
			
		||||
  bool                 is_inited_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class ObRandomOSSReader : public ObFileReader
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObRandomOSSReader(ObIAllocator &allocator);
 | 
			
		||||
  virtual ~ObRandomOSSReader();
 | 
			
		||||
  int open(const share::ObBackupStorageInfo &storage_info, const ObString &filename);
 | 
			
		||||
 | 
			
		||||
  int read(char *buf, int64_t count, int64_t &read_size) override;
 | 
			
		||||
  int seek(int64_t offset) override;
 | 
			
		||||
  int get_file_size(int64_t &file_size) override;
 | 
			
		||||
  int64_t get_offset() const override { return offset_; }
 | 
			
		||||
  bool eof() const override { return eof_; }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  ObIODevice *device_handle_;
 | 
			
		||||
  ObIOFd      fd_;
 | 
			
		||||
  int64_t     offset_;
 | 
			
		||||
  bool        eof_;
 | 
			
		||||
  bool        is_inited_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * A strem file reader whose data source is mysql packets
 | 
			
		||||
 * Refer to LOAD DATA LOCAL INFILE for more detail.
 | 
			
		||||
 * Read data flow:
 | 
			
		||||
 * client send file content through mysql packets
 | 
			
		||||
 * (@see PacketStreamFileReader::read) and end with an
 | 
			
		||||
 * empty mysql packet.
 | 
			
		||||
 */
 | 
			
		||||
class ObPacketStreamFileReader : public ObStreamFileReader
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  ObPacketStreamFileReader(ObIAllocator &allocator);
 | 
			
		||||
  virtual ~ObPacketStreamFileReader();
 | 
			
		||||
 | 
			
		||||
  int open(const ObString &filename,
 | 
			
		||||
           observer::ObIMPPacketSender &packet_handle,
 | 
			
		||||
           ObSQLSessionInfo *session,
 | 
			
		||||
           int64_t timeout_ts);
 | 
			
		||||
 | 
			
		||||
  int read(char *buf, int64_t count, int64_t &read_size) override;
 | 
			
		||||
  int64_t get_offset() const override { return read_size_; }
 | 
			
		||||
  bool eof() const override { return eof_; }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  int receive_packet();
 | 
			
		||||
 | 
			
		||||
  /// The packet read from NIO is cached, so we must release it explicitly
 | 
			
		||||
  /// and then we can reuse the resource
 | 
			
		||||
  int release_packet();
 | 
			
		||||
 | 
			
		||||
  bool is_timeout() const;
 | 
			
		||||
  bool is_killed() const;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  observer::ObIMPPacketSender *packet_handle_; // We use this handle to read packet from client
 | 
			
		||||
  ObSQLSessionInfo *session_;
 | 
			
		||||
  int64_t timeout_ts_; // The deadline of job
 | 
			
		||||
 | 
			
		||||
  // As we read a packet from client, the NIO store the data into the NIO buffer
 | 
			
		||||
  // and allocate an ObPacket by an allocator(arena_allocator_). The ObPacket(cached_packet_)
 | 
			
		||||
  // is cached in the memory of allocator.
 | 
			
		||||
  ObArenaAllocator arena_allocator_;
 | 
			
		||||
  obmysql::ObMySQLRawPacket *cached_packet_;
 | 
			
		||||
 | 
			
		||||
  int64_t received_size_;  // All data received in bytes
 | 
			
		||||
  int64_t read_size_;      // All data has been read in bytes
 | 
			
		||||
  bool    eof_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace sql
 | 
			
		||||
} // namespace oceanbase
 | 
			
		||||
 | 
			
		||||
#endif  // OCEANBASE_SQL_LOAD_DATA_FILE_READER_H_
 | 
			
		||||
@ -1388,23 +1388,12 @@ int ObLoadDataSPImpl::next_file_buffer(ObExecContext &ctx,
 | 
			
		||||
    //从data_trimer中恢复出上次读取剩下的数据
 | 
			
		||||
    OZ (box.data_trimer.recover_incomplate_data(*handle->data_buffer));
 | 
			
		||||
 | 
			
		||||
    if (ObLoadFileLocation::SERVER_DISK == box.load_file_storage) {
 | 
			
		||||
      OZ (box.file_reader.pread(handle->data_buffer->current_ptr(),
 | 
			
		||||
                                handle->data_buffer->get_remain_len(),
 | 
			
		||||
                                box.read_cursor.file_offset_,
 | 
			
		||||
                                box.read_cursor.read_size_));
 | 
			
		||||
    } else {
 | 
			
		||||
      OZ (box.device_handle_->pread(box.fd_, box.read_cursor.file_offset_,
 | 
			
		||||
                                  handle->data_buffer->get_remain_len(),
 | 
			
		||||
                                  handle->data_buffer->current_ptr(),
 | 
			
		||||
                                  box.read_cursor.read_size_));
 | 
			
		||||
    }
 | 
			
		||||
    OZ (box.file_reader->readn(handle->data_buffer->current_ptr(),
 | 
			
		||||
                               handle->data_buffer->get_remain_len(),
 | 
			
		||||
                               box.read_cursor.read_size_));
 | 
			
		||||
 | 
			
		||||
    if (OB_SUCC(ret)) {
 | 
			
		||||
      if (OB_UNLIKELY(0 == box.read_cursor.read_size_)) {
 | 
			
		||||
        box.read_cursor.is_end_file_ = true;
 | 
			
		||||
        LOG_DEBUG("LOAD DATA reach file end", K(box.read_cursor));
 | 
			
		||||
      } else {
 | 
			
		||||
      if (OB_LIKELY(box.read_cursor.read_size_ > 0)) {
 | 
			
		||||
        handle->data_buffer->update_pos(box.read_cursor.read_size_); //更新buffer中数据长度
 | 
			
		||||
        int64_t last_proccessed_GBs = box.read_cursor.get_total_read_GBs();
 | 
			
		||||
        box.read_cursor.commit_read();
 | 
			
		||||
@ -1414,6 +1403,9 @@ int ObLoadDataSPImpl::next_file_buffer(ObExecContext &ctx,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        box.job_status->read_bytes_ += box.read_cursor.read_size_;
 | 
			
		||||
      } else if (box.file_reader->eof()) {
 | 
			
		||||
        box.read_cursor.is_end_file_ = true;
 | 
			
		||||
        LOG_DEBUG("LOAD DATA reach file end", K(box.read_cursor));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -2465,12 +2457,10 @@ int ObLoadDataSPImpl::ToolBox::release_resources()
 | 
			
		||||
    ob_free(expr_buffer);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //release fd and device
 | 
			
		||||
  if (NULL != device_handle_) {
 | 
			
		||||
    if (fd_.is_valid()) {
 | 
			
		||||
      device_handle_->close(fd_);
 | 
			
		||||
    }
 | 
			
		||||
    common::ObDeviceManager::get_instance().release_device(device_handle_);
 | 
			
		||||
  //release file reader
 | 
			
		||||
  if (OB_NOT_NULL(file_reader)) {
 | 
			
		||||
    file_reader->~ObFileReader();
 | 
			
		||||
    file_reader = NULL;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (OB_NOT_NULL(temp_handle)) {
 | 
			
		||||
@ -2708,12 +2698,6 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
 | 
			
		||||
    LOG_WARN("fail to gen insert column names buff", K(ret));
 | 
			
		||||
  } else if (OB_FAIL(data_frag_mgr.init(ctx, load_args.table_id_))) {
 | 
			
		||||
    LOG_WARN("fail to init data frag mgr", K(ret));
 | 
			
		||||
  } else if (ObLoadFileLocation::SERVER_DISK != load_file_storage) {
 | 
			
		||||
    if (OB_FAIL(util.get_and_init_device(device_handle_, &load_args.access_info_, load_args.file_name_))) {
 | 
			
		||||
      LOG_WARN("fail to get device manager", K(ret), K(load_args.access_info_), K(load_args.file_name_));
 | 
			
		||||
    } else if (OB_FAIL(util.set_access_type(&iod_opts, false, 1))) {
 | 
			
		||||
      LOG_WARN("fail to set access type", K(ret));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //init server_info_map
 | 
			
		||||
@ -2779,14 +2763,20 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
    if (ObLoadFileLocation::SERVER_DISK == load_file_storage) {
 | 
			
		||||
      OZ (file_reader.open(load_args.file_name_, false));
 | 
			
		||||
      OX (file_size = get_file_size(load_args.file_name_.ptr()));
 | 
			
		||||
    } else {
 | 
			
		||||
      int64_t file_length = -1;
 | 
			
		||||
      OZ (device_handle_->open(load_args.file_name_.ptr(), -1, 0, fd_, &iod_opts));
 | 
			
		||||
      OZ (util.get_file_size(device_handle_, fd_, file_length));
 | 
			
		||||
      OX (file_size = file_length);
 | 
			
		||||
    file_read_param.file_location_   = load_file_storage;
 | 
			
		||||
    file_read_param.filename_        = load_args.file_name_;
 | 
			
		||||
    file_read_param.access_info_     = load_args.access_info_;
 | 
			
		||||
    file_read_param.packet_handle_   = &ctx.get_my_session()->get_pl_query_sender()->get_packet_sender();
 | 
			
		||||
    file_read_param.session_         = ctx.get_my_session();
 | 
			
		||||
    file_read_param.timeout_ts_      = THIS_WORKER.get_timeout_ts();
 | 
			
		||||
 | 
			
		||||
    if (OB_FAIL(ObFileReader::open(file_read_param, ctx.get_allocator(), file_reader))) {
 | 
			
		||||
      LOG_WARN("failed to open file.", KR(ret), K(file_read_param), K(load_args.file_name_));
 | 
			
		||||
 | 
			
		||||
    } else if (!file_reader->seekable()) {
 | 
			
		||||
      file_size = -1;
 | 
			
		||||
    } else if (OB_FAIL(file_reader->get_file_size(file_size))) {
 | 
			
		||||
      LOG_WARN("fail to get io device file size", KR(ret), K(file_size));
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -3075,7 +3065,8 @@ int ObLoadDataSPImpl::ToolBox::init(ObExecContext &ctx, ObLoadDataStmt &load_stm
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
    int64_t max_task_count = (file_size / ObLoadFileBuffer::MAX_BUFFER_SIZE + 1) * 2;
 | 
			
		||||
    const int64_t fake_file_size = (file_size > 0) ? file_size : (2 << 30); // use 2G as default in load local mode
 | 
			
		||||
    int64_t max_task_count = (fake_file_size / ObLoadFileBuffer::MAX_BUFFER_SIZE + 1) * 2;
 | 
			
		||||
    if (OB_FAIL(file_buf_row_num.reserve(max_task_count))) {
 | 
			
		||||
      LOG_WARN("fail to reserve", K(ret));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,7 @@
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_rpc.h"
 | 
			
		||||
#include "sql/engine/ob_des_exec_context.h"
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_parser.h"
 | 
			
		||||
#include "sql/engine/cmd/ob_load_data_file_reader.h"
 | 
			
		||||
#include "common/storage/ob_io_device.h"
 | 
			
		||||
 | 
			
		||||
namespace oceanbase
 | 
			
		||||
@ -684,7 +685,7 @@ public:
 | 
			
		||||
    InsertTask,
 | 
			
		||||
  };
 | 
			
		||||
  struct ToolBox {
 | 
			
		||||
    ToolBox() : device_handle_(NULL), fd_(), expr_buffer(nullptr) {}
 | 
			
		||||
    ToolBox() : expr_buffer(nullptr) {}
 | 
			
		||||
    int init(ObExecContext &ctx, ObLoadDataStmt &load_stmt);
 | 
			
		||||
    int build_calc_partid_expr(ObExecContext &ctx,
 | 
			
		||||
                               ObLoadDataStmt &load_stmt,
 | 
			
		||||
@ -692,14 +693,13 @@ public:
 | 
			
		||||
    int release_resources();
 | 
			
		||||
 | 
			
		||||
    //modules
 | 
			
		||||
    ObFileReader file_reader;
 | 
			
		||||
    ObIODevice* device_handle_;
 | 
			
		||||
    ObIOFd fd_;
 | 
			
		||||
    ObFileReader * file_reader;
 | 
			
		||||
    ObFileAppender file_appender;
 | 
			
		||||
    ObFileReadCursor read_cursor;
 | 
			
		||||
    ObLoadFileDataTrimer data_trimer;
 | 
			
		||||
    ObInsertValueGenerator generator;
 | 
			
		||||
    ObDataFragMgr data_frag_mgr;
 | 
			
		||||
    ObFileReadParam file_read_param;
 | 
			
		||||
 | 
			
		||||
    //running control
 | 
			
		||||
    ObParallelTaskController shuffle_task_controller;
 | 
			
		||||
 | 
			
		||||
@ -92,92 +92,31 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
 | 
			
		||||
      case T_REMOTE_OSS:
 | 
			
		||||
        load_args.load_file_storage_ = ObLoadFileLocation::OSS;
 | 
			
		||||
        break;
 | 
			
		||||
      case T_LOCAL:
 | 
			
		||||
        //load_args.load_file_storage_ = ObLoadFileLocation::CLIENT_DISK;
 | 
			
		||||
        //break;
 | 
			
		||||
        //not support local
 | 
			
		||||
      case T_LOCAL: {
 | 
			
		||||
          bool enabled = false;
 | 
			
		||||
          if (OB_FAIL(local_infile_enabled(enabled))) {
 | 
			
		||||
            LOG_WARN("failed to check local_infile_enabled", K(ret));
 | 
			
		||||
          } else if (!enabled) {
 | 
			
		||||
            ret = OB_ERR_CLIENT_LOCAL_FILES_DISABLED;
 | 
			
		||||
            LOG_USER_ERROR(OB_ERR_CLIENT_LOCAL_FILES_DISABLED);
 | 
			
		||||
          } else {
 | 
			
		||||
            load_args.load_file_storage_ = ObLoadFileLocation::CLIENT_DISK;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
      default:
 | 
			
		||||
        ret = OB_NOT_SUPPORTED;
 | 
			
		||||
        LOG_USER_ERROR(OB_NOT_SUPPORTED, "load data local");
 | 
			
		||||
        LOG_USER_ERROR(OB_NOT_SUPPORTED, "unknown location");
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      load_args.load_file_storage_ = ObLoadFileLocation::SERVER_DISK;
 | 
			
		||||
    }
 | 
			
		||||
    LOG_DEBUG("load data location", K(load_args.load_file_storage_));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
    /* 1. file name */
 | 
			
		||||
    ObLoadArgument &load_args = load_stmt->get_load_arguments();
 | 
			
		||||
    ParseNode *file_name_node = node->children_[ENUM_FILE_NAME];
 | 
			
		||||
    if (OB_ISNULL(file_name_node)
 | 
			
		||||
        || OB_UNLIKELY(T_VARCHAR != file_name_node->type_ && T_CHAR != file_name_node->type_)) {
 | 
			
		||||
      ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
      LOG_WARN("invalid node", "child", file_name_node);
 | 
			
		||||
    } else {
 | 
			
		||||
      ObString file_name(file_name_node->str_len_, file_name_node->str_value_);
 | 
			
		||||
      if (ObLoadFileLocation::OSS != load_args.load_file_storage_) {
 | 
			
		||||
        load_args.file_name_ = file_name;
 | 
			
		||||
        const char *p = nullptr;
 | 
			
		||||
        ObString sub_file_name;
 | 
			
		||||
        ObString cstyle_file_name; // ends with '\0'
 | 
			
		||||
        char *full_path_buf = nullptr;
 | 
			
		||||
        char *actual_path = nullptr;
 | 
			
		||||
        if (OB_ISNULL(full_path_buf = static_cast<char *>(allocator_->alloc(MAX_PATH_SIZE)))) {
 | 
			
		||||
          ret = OB_ALLOCATE_MEMORY_FAILED;
 | 
			
		||||
          LOG_WARN("fail to allocate memory", K(ret));
 | 
			
		||||
        }
 | 
			
		||||
        while (OB_SUCC(ret) && !file_name.empty()) {
 | 
			
		||||
          p = file_name.find(',');
 | 
			
		||||
          if (nullptr == p) {
 | 
			
		||||
            sub_file_name = file_name;
 | 
			
		||||
            cstyle_file_name = sub_file_name;
 | 
			
		||||
            file_name.reset();
 | 
			
		||||
          } else {
 | 
			
		||||
            sub_file_name = file_name.split_on(p);
 | 
			
		||||
            cstyle_file_name.reset();
 | 
			
		||||
          }
 | 
			
		||||
          if (!sub_file_name.empty()) {
 | 
			
		||||
            if (cstyle_file_name.empty() &&
 | 
			
		||||
                OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) {
 | 
			
		||||
              LOG_WARN("fail to write string", KR(ret));
 | 
			
		||||
            } else if (OB_ISNULL(actual_path = realpath(cstyle_file_name.ptr(), full_path_buf))) {
 | 
			
		||||
              ret = OB_FILE_NOT_EXIST;
 | 
			
		||||
              LOG_WARN("file not exist", K(ret), K(cstyle_file_name));
 | 
			
		||||
            }
 | 
			
		||||
            if (OB_SUCC(ret)) {
 | 
			
		||||
              ObString secure_file_priv;
 | 
			
		||||
              if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) {
 | 
			
		||||
                LOG_WARN("failed to get secure file priv", K(ret));
 | 
			
		||||
              } else if (OB_FAIL(
 | 
			
		||||
                           ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) {
 | 
			
		||||
                LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv),
 | 
			
		||||
                         K(actual_path));
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
            if (OB_SUCC(ret)) {
 | 
			
		||||
              if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) {
 | 
			
		||||
                LOG_WARN("fail to add files", KR(ret));
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        ObString temp_file_name = file_name.split_on('?');
 | 
			
		||||
        ObString storage_info;
 | 
			
		||||
        if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) {
 | 
			
		||||
          LOG_WARN("fail to copy string", K(ret));
 | 
			
		||||
        } else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) {
 | 
			
		||||
          LOG_WARN("fail to copy string", K(ret));
 | 
			
		||||
        } else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) {
 | 
			
		||||
          ret = OB_INVALID_ARGUMENT;
 | 
			
		||||
          LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key");
 | 
			
		||||
        } else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) {
 | 
			
		||||
          LOG_WARN("failed to set access info", K(ret));
 | 
			
		||||
        } else if (OB_FAIL(load_args.file_iter_.add_files(&load_args.file_name_))) {
 | 
			
		||||
          LOG_WARN("fail to add files", KR(ret));
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    ret = resolve_filename(load_stmt, node);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (OB_SUCC(ret)) {
 | 
			
		||||
@ -185,7 +124,10 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
 | 
			
		||||
    ObLoadArgument &load_args = load_stmt->get_load_arguments();
 | 
			
		||||
    ObLoadDupActionType dupl_action = ObLoadDupActionType::LOAD_STOP_ON_DUP;
 | 
			
		||||
    if (NULL == node->children_[ENUM_DUPLICATE_ACTION]) {
 | 
			
		||||
      if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_) {
 | 
			
		||||
      if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_ &&
 | 
			
		||||
          lib::is_mysql_mode()) {
 | 
			
		||||
        // https://dev.mysql.com/doc/refman/8.0/en/load-data.html
 | 
			
		||||
        // In MySQL, LOCAL modifier has the same effect as the IGNORE modifier.
 | 
			
		||||
        dupl_action = ObLoadDupActionType::LOAD_IGNORE;
 | 
			
		||||
      }
 | 
			
		||||
    } else if (T_IGNORE == node->children_[ENUM_DUPLICATE_ACTION]->type_) {
 | 
			
		||||
@ -561,6 +503,92 @@ int ObLoadDataResolver::resolve_hints(const ParseNode &node)
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataResolver::resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *node)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
 | 
			
		||||
  ObLoadArgument &load_args = load_stmt->get_load_arguments();
 | 
			
		||||
  ParseNode *file_name_node = node->children_[ENUM_FILE_NAME];
 | 
			
		||||
  if (OB_ISNULL(file_name_node)
 | 
			
		||||
      || OB_UNLIKELY(T_VARCHAR != file_name_node->type_ && T_CHAR != file_name_node->type_)) {
 | 
			
		||||
    ret = OB_ERR_UNEXPECTED;
 | 
			
		||||
    LOG_WARN("invalid node", "child", file_name_node);
 | 
			
		||||
  } else {
 | 
			
		||||
    ObString file_name(file_name_node->str_len_, file_name_node->str_value_);
 | 
			
		||||
    if (ObLoadFileLocation::OSS != load_args.load_file_storage_) {
 | 
			
		||||
      load_args.file_name_ = file_name;
 | 
			
		||||
      const char *p = nullptr;
 | 
			
		||||
      ObString sub_file_name;
 | 
			
		||||
      ObString cstyle_file_name; // ends with '\0'
 | 
			
		||||
      char *full_path_buf = nullptr;
 | 
			
		||||
      char *actual_path = nullptr;
 | 
			
		||||
      if (OB_ISNULL(full_path_buf = static_cast<char *>(allocator_->alloc(MAX_PATH_SIZE)))) {
 | 
			
		||||
        ret = OB_ALLOCATE_MEMORY_FAILED;
 | 
			
		||||
        LOG_WARN("fail to allocate memory", K(ret));
 | 
			
		||||
      }
 | 
			
		||||
      while (OB_SUCC(ret) && !file_name.empty()) {
 | 
			
		||||
        p = file_name.find(',');
 | 
			
		||||
        if (nullptr == p) {
 | 
			
		||||
          sub_file_name = file_name;
 | 
			
		||||
          cstyle_file_name = sub_file_name;
 | 
			
		||||
          file_name.reset();
 | 
			
		||||
        } else {
 | 
			
		||||
          sub_file_name = file_name.split_on(p);
 | 
			
		||||
          cstyle_file_name.reset();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!sub_file_name.empty()) {
 | 
			
		||||
          if (cstyle_file_name.empty() &&
 | 
			
		||||
              OB_FAIL(ob_write_string(*allocator_, sub_file_name, cstyle_file_name, true))) {
 | 
			
		||||
            LOG_WARN("fail to write string", KR(ret));
 | 
			
		||||
          } else if (ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_ &&
 | 
			
		||||
                     OB_ISNULL(actual_path = realpath(cstyle_file_name.ptr(), full_path_buf))) {
 | 
			
		||||
            ret = OB_FILE_NOT_EXIST;
 | 
			
		||||
            LOG_WARN("file not exist", K(ret), K(cstyle_file_name));
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          //security check for mysql mode
 | 
			
		||||
          if (OB_SUCC(ret) && lib::is_mysql_mode() && ObLoadFileLocation::SERVER_DISK == load_args.load_file_storage_) {
 | 
			
		||||
            ObString secure_file_priv;
 | 
			
		||||
            if (OB_FAIL(session_info_->get_secure_file_priv(secure_file_priv))) {
 | 
			
		||||
              LOG_WARN("failed to get secure file priv", K(ret));
 | 
			
		||||
            } else if (OB_FAIL(
 | 
			
		||||
                         ObResolverUtils::check_secure_path(secure_file_priv, actual_path))) {
 | 
			
		||||
              LOG_WARN("failed to check secure path", K(ret), K(secure_file_priv),
 | 
			
		||||
                       K(actual_path));
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          if (OB_SUCC(ret)) {
 | 
			
		||||
            if (ObLoadFileLocation::CLIENT_DISK == load_args.load_file_storage_ && load_args.file_iter_.count() != 0) {
 | 
			
		||||
              ret = OB_NOT_SUPPORTED;
 | 
			
		||||
              LOG_USER_ERROR(OB_NOT_SUPPORTED, "load multi files not supported");
 | 
			
		||||
            } else if (OB_FAIL(load_args.file_iter_.add_files(&cstyle_file_name))) {
 | 
			
		||||
              LOG_WARN("fail to add files", KR(ret));
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      ObString temp_file_name = file_name.split_on('?');
 | 
			
		||||
      ObString storage_info;
 | 
			
		||||
      if (OB_FAIL(ob_write_string(*allocator_, temp_file_name, load_args.file_name_, true))) {
 | 
			
		||||
        LOG_WARN("fail to copy string", K(ret));
 | 
			
		||||
      } else if (OB_FAIL(ob_write_string(*allocator_, file_name, storage_info, true))) {
 | 
			
		||||
        LOG_WARN("fail to copy string", K(ret));
 | 
			
		||||
      } else if (temp_file_name.length() <= 0 || storage_info.length() <= 0) {
 | 
			
		||||
        ret = OB_INVALID_ARGUMENT;
 | 
			
		||||
        LOG_USER_ERROR(OB_INVALID_ARGUMENT, "file name or access key");
 | 
			
		||||
      } else if (OB_FAIL(load_args.access_info_.set(load_args.file_name_.ptr(), storage_info.ptr()))) {
 | 
			
		||||
        LOG_WARN("failed to set access info", K(ret));
 | 
			
		||||
      } else if (OB_FAIL(load_args.file_iter_.add_files(&load_args.file_name_))) {
 | 
			
		||||
        LOG_WARN("fail to add files", KR(ret));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//validation for loaddata statement obeys the following rules:
 | 
			
		||||
//0. in loaddata Ver1, only ascii charset are supported.
 | 
			
		||||
//1. according to the defined charset, escaped and enclosed valid char length should <= 1.
 | 
			
		||||
@ -1349,6 +1377,42 @@ int ObLoadDataResolver::resolve_char_node(const ParseNode &node, int32_t &single
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataResolver::local_infile_enabled(bool &enabled) const
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
 | 
			
		||||
  // 1. let's check the system variable and the capability flag in the mysql handshake
 | 
			
		||||
  enabled = false;
 | 
			
		||||
  int64_t local_infile_sys_var = 0;
 | 
			
		||||
  if (OB_ISNULL(session_info_)) {
 | 
			
		||||
  } else if (OB_FAIL(session_info_->get_sys_variable(share::SYS_VAR_LOCAL_INFILE, local_infile_sys_var))) {
 | 
			
		||||
    LOG_WARN("failed to get SYS_VAR_LOCAL_INFILE system variable.", KR(ret));
 | 
			
		||||
  } else {
 | 
			
		||||
    const int64_t local_infile_capability_flag = session_info_->get_capability().cap_flags_.OB_CLIENT_LOCAL_FILES;
 | 
			
		||||
    enabled = (local_infile_sys_var != 0) && (local_infile_capability_flag != 0);
 | 
			
		||||
    LOG_DEBUG("LOCAL_INFILE enabled by system variable and client capability",
 | 
			
		||||
              K(enabled), K(local_infile_capability_flag), K(local_infile_sys_var));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // 2. let's check the client type.
 | 
			
		||||
  // The obproxy set the capability flag but it does not support load local
 | 
			
		||||
  if (OB_SUCC(ret) && enabled) {
 | 
			
		||||
    if (session_info_->get_client_mode() > common::OB_MIN_CLIENT_MODE &&
 | 
			
		||||
        session_info_->get_client_mode() < OB_MAX_CLIENT_MODE) {
 | 
			
		||||
      // this is an ob client, such as obclient 2.x, objdbc, obproxy, obclient 1.x is not included
 | 
			
		||||
      // check the proxy capability flags
 | 
			
		||||
      obmysql::ObProxyCapabilityFlags proxy_cap = session_info_->get_proxy_cap_flags();
 | 
			
		||||
      LOG_DEBUG("load local infile: get proxy capability flag",
 | 
			
		||||
                K(proxy_cap.capability_), K(proxy_cap.is_load_local_support()));
 | 
			
		||||
      if (!proxy_cap.is_load_local_support()) {
 | 
			
		||||
        enabled = false;
 | 
			
		||||
        LOG_INFO("load data local infile is disabled by client: the obclient proxy capability flag is not set");
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int ObLoadDataResolver::check_trigger_constraint(const ObTableSchema *table_schema)
 | 
			
		||||
{
 | 
			
		||||
  int ret = OB_SUCCESS;
 | 
			
		||||
 | 
			
		||||
@ -57,6 +57,10 @@ public:
 | 
			
		||||
                            const common::ObString &table_name, bool cte_table_fisrt, uint64_t& table_id);
 | 
			
		||||
  int validate_stmt(ObLoadDataStmt* stmt);
 | 
			
		||||
  int resolve_hints(const ParseNode &node);
 | 
			
		||||
 | 
			
		||||
  int resolve_filename(ObLoadDataStmt *load_stmt, ParseNode *node);
 | 
			
		||||
  int local_infile_enabled(bool &enabled) const;
 | 
			
		||||
 | 
			
		||||
  int check_trigger_constraint(const ObTableSchema *table_schema);
 | 
			
		||||
private:
 | 
			
		||||
  enum ParameterEnum {
 | 
			
		||||
 | 
			
		||||
@ -1031,6 +1031,7 @@ public:
 | 
			
		||||
 | 
			
		||||
  // client mode related
 | 
			
		||||
  void set_client_mode(const common::ObClientMode mode) { client_mode_ = mode; }
 | 
			
		||||
  common::ObClientMode get_client_mode() const { return client_mode_; }
 | 
			
		||||
  bool is_java_client_mode() const { return common::OB_JAVA_CLIENT_MODE == client_mode_; }
 | 
			
		||||
  bool is_obproxy_mode() const { return common::OB_PROXY_CLIENT_MODE == client_mode_; }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user