[FEAT MERGE] Support external table
Co-authored-by: jingtaoye35 <1255153887@qq.com>
This commit is contained in:
@ -15,6 +15,9 @@
|
||||
#include "sql/engine/cmd/ob_load_data_parser.h"
|
||||
#include "sql/resolver/cmd/ob_load_data_stmt.h"
|
||||
#include "lib/oblog/ob_log_module.h"
|
||||
#include "lib/utility/ob_print_utils.h"
|
||||
#include "lib/string/ob_hex_utils_base.h"
|
||||
#include "deps/oblib/src/lib/list/ob_dlist.h"
|
||||
|
||||
using namespace oceanbase::sql;
|
||||
using namespace oceanbase::common;
|
||||
@ -23,30 +26,69 @@ namespace oceanbase
|
||||
{
|
||||
namespace sql
|
||||
{
|
||||
const char INVALID_TERM_CHAR = '\xff';
|
||||
|
||||
const char * FORMAT_TYPE_STR[] = {
|
||||
"CSV",
|
||||
};
|
||||
static_assert(array_elements(FORMAT_TYPE_STR) == ObExternalFileFormat::MAX_FORMAT, "Not enough initializer for ObExternalFileFormat");
|
||||
|
||||
int ObCSVGeneralFormat::init_format(const ObDataInFileStruct &format,
|
||||
int64_t file_column_nums,
|
||||
ObCollationType file_cs_type)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
if (!ObCharset::is_valid_collation(file_cs_type)) {
|
||||
ret = OB_ERR_UNKNOWN_CHARSET;
|
||||
LOG_WARN("invalid charset", K(ret), K(file_cs_type));
|
||||
} else {
|
||||
cs_type_ = ObCharset::charset_type_by_coll(file_cs_type);
|
||||
file_column_nums_ = file_column_nums;
|
||||
field_enclosed_char_ = format.field_enclosed_char_;
|
||||
field_escaped_char_ = format.field_escaped_char_;
|
||||
field_term_str_ = format.field_term_str_;
|
||||
line_term_str_ = format.line_term_str_;
|
||||
line_start_str_ = format.line_start_str_;
|
||||
if (line_term_str_.empty() && !field_term_str_.empty()) {
|
||||
line_term_str_ = field_term_str_;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int ObCSVGeneralParser::init(const ObDataInFileStruct &format,
|
||||
int64_t file_column_nums,
|
||||
ObCollationType file_cs_type)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
const char INVALID_TERM_CHAR = '\xff';
|
||||
|
||||
if (!ObCharset::is_valid_collation(file_cs_type)) {
|
||||
ret = OB_ERR_UNKNOWN_CHARSET;
|
||||
LOG_WARN("invalid charset", K(ret), K(file_cs_type));
|
||||
} else {
|
||||
format_.cs_type_ = ObCharset::charset_type_by_coll(file_cs_type);
|
||||
format_.file_column_nums_ = file_column_nums;
|
||||
format_.field_enclosed_char_ = format.field_enclosed_char_;
|
||||
format_.field_escaped_char_ = format.field_escaped_char_;
|
||||
format_.field_term_str_ = format.field_term_str_;
|
||||
format_.line_term_str_ = format.line_term_str_;
|
||||
format_.line_start_str_ = format.line_start_str_;
|
||||
if (format_.line_term_str_.empty() && !format_.field_term_str_.empty()) {
|
||||
format_.line_term_str_ = format_.field_term_str_;
|
||||
}
|
||||
if (OB_FAIL(format_.init_format(format, file_column_nums, file_cs_type))) {
|
||||
LOG_WARN("fail to init format", K(ret));
|
||||
} else if (OB_FAIL(init_opt_variables())) {
|
||||
LOG_WARN("fail to init opt values", K(ret));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSVGeneralParser::init(const ObCSVGeneralFormat &format)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
format_ = format;
|
||||
|
||||
if (OB_FAIL(init_opt_variables())) {
|
||||
LOG_WARN("fail to init opt values", K(ret));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObCSVGeneralParser::init_opt_variables()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_SUCC(ret)) {
|
||||
opt_param_.line_term_c_ = format_.line_term_str_.empty() ? INVALID_TERM_CHAR : format_.line_term_str_[0];
|
||||
opt_param_.field_term_c_ = format_.field_term_str_.empty() ? INVALID_TERM_CHAR : format_.field_term_str_[0];
|
||||
@ -65,10 +107,9 @@ int ObCSVGeneralParser::init(const ObDataInFileStruct &format,
|
||||
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && OB_FAIL(fields_per_line_.prepare_allocate(file_column_nums))) {
|
||||
LOG_WARN("fail to allocate memory", K(ret), K(file_column_nums));
|
||||
if (OB_SUCC(ret) && OB_FAIL(fields_per_line_.prepare_allocate(format_.file_column_nums_))) {
|
||||
LOG_WARN("fail to allocate memory", K(ret), K(format_.file_column_nums_));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -81,7 +122,7 @@ int ObCSVGeneralParser::handle_irregular_line(int field_idx, int line_no,
|
||||
OB_WARN_TOO_MANY_RECORDS : OB_WARN_TOO_FEW_RECORDS;
|
||||
rec.line_no = line_no;
|
||||
OX (errors.push_back(rec));
|
||||
for (int i = field_idx; i < format_.file_column_nums_; ++i) {
|
||||
for (int i = field_idx; OB_SUCC(ret) && i < format_.file_column_nums_; ++i) {
|
||||
FieldValue &new_field = fields_per_line_.at(i);
|
||||
new_field = FieldValue();
|
||||
new_field.is_null_ = 1;
|
||||
@ -89,5 +130,304 @@ int ObCSVGeneralParser::handle_irregular_line(int field_idx, int line_no,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int64_t ObCSVGeneralFormat::to_json_kv_string(char *buf, const int64_t buf_len) const
|
||||
{
|
||||
int64_t pos = 0;
|
||||
int64_t idx = 0;
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(line_term_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], to_cstring(ObHexStringWrap(field_term_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%ld", OPTION_NAMES[idx++], field_escaped_char_);
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%ld", OPTION_NAMES[idx++], field_enclosed_char_);
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", OPTION_NAMES[idx++], ObCharset::charset_name(cs_type_));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%ld", OPTION_NAMES[idx++], skip_header_lines_);
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%s", OPTION_NAMES[idx++], STR_BOOL(skip_blank_lines_));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%s", OPTION_NAMES[idx++], STR_BOOL(trim_space_));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":", OPTION_NAMES[idx++]);
|
||||
J_ARRAY_START();
|
||||
for (int64_t i = 0; i < null_if_.count(); i++) {
|
||||
if (i != 0) {
|
||||
J_COMMA();
|
||||
}
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\"", to_cstring(ObHexStringWrap(null_if_.at(i))));
|
||||
}
|
||||
J_ARRAY_END();
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":%s", OPTION_NAMES[idx++], STR_BOOL(empty_field_as_null_));
|
||||
return pos;
|
||||
}
|
||||
|
||||
int ObCSVGeneralFormat::load_from_json_data(json::Pair *&node, ObIAllocator &allocator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t idx = 0;
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
line_term_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
field_term_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_NUMBER == node->value_->get_type()) {
|
||||
field_escaped_char_ = node->value_->get_number();
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_NUMBER == node->value_->get_type()) {
|
||||
field_enclosed_char_ = node->value_->get_number();
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
cs_type_ = ObCharset::charset_type(node->value_->get_string());
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_NUMBER == node->value_->get_type()) {
|
||||
skip_header_lines_ = node->value_->get_number();
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])) {
|
||||
if (json::JT_TRUE == node->value_->get_type()) {
|
||||
skip_blank_lines_ = true;
|
||||
} else {
|
||||
skip_blank_lines_ = false;
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])) {
|
||||
if (json::JT_TRUE == node->value_->get_type()) {
|
||||
trim_space_ = true;
|
||||
} else {
|
||||
trim_space_ = false;
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])
|
||||
&& json::JT_ARRAY == node->value_->get_type()) {
|
||||
const json::Array &it_array = node->value_->get_array();
|
||||
int64_t idx = 0;
|
||||
if (it_array.get_size() > 0
|
||||
&& OB_FAIL(null_if_.allocate_array(allocator, it_array.get_size()))) {
|
||||
LOG_WARN("allocate array failed", K(ret));
|
||||
}
|
||||
for (auto it_tmp = it_array.get_first();
|
||||
OB_SUCC(ret) && it_tmp != it_array.get_header() && it_tmp != NULL;
|
||||
it_tmp = it_tmp->get_next()) {
|
||||
if (OB_UNLIKELY(json::JT_STRING != it_tmp->get_type())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("null_if_ child is not string", K(ret), "type", it_tmp->get_type());
|
||||
} else {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(it_tmp->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
null_if_.at(idx++) = obj.get_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_NOT_NULL(node) && 0 == node->name_.case_compare(OPTION_NAMES[idx++])) {
|
||||
if (json::JT_TRUE == node->value_->get_type()) {
|
||||
empty_field_as_null_ = true;
|
||||
} else {
|
||||
empty_field_as_null_ = false;
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int64_t ObOriginFileFormat::to_json_kv_string(char *buf, const int64_t buf_len) const
|
||||
{
|
||||
int64_t pos = 0;
|
||||
int64_t idx = 0;
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", ORIGIN_FORMAT_STRING[idx++], to_cstring(ObHexStringWrap(origin_line_term_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", ORIGIN_FORMAT_STRING[idx++], to_cstring(ObHexStringWrap(origin_field_term_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", ORIGIN_FORMAT_STRING[idx++], to_cstring(ObHexStringWrap(origin_field_escaped_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", ORIGIN_FORMAT_STRING[idx++], to_cstring(ObHexStringWrap(origin_field_enclosed_str_)));
|
||||
J_COMMA();
|
||||
databuff_printf(buf, buf_len, pos, "\"%s\":\"%s\"", ORIGIN_FORMAT_STRING[idx++], to_cstring(ObHexStringWrap(origin_null_if_str_)));
|
||||
return pos;
|
||||
}
|
||||
|
||||
int ObOriginFileFormat::load_from_json_data(json::Pair *&node, ObIAllocator &allocator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t idx = 0;
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node)
|
||||
&& 0 == node->name_.case_compare(ORIGIN_FORMAT_STRING[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
origin_line_term_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node)
|
||||
&& 0 == node->name_.case_compare(ORIGIN_FORMAT_STRING[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
origin_field_term_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node)
|
||||
&& 0 == node->name_.case_compare(ORIGIN_FORMAT_STRING[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
origin_field_escaped_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node)
|
||||
&& 0 == node->name_.case_compare(ORIGIN_FORMAT_STRING[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
origin_field_enclosed_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
if (OB_SUCC(ret) && OB_NOT_NULL(node)
|
||||
&& 0 == node->name_.case_compare(ORIGIN_FORMAT_STRING[idx++])
|
||||
&& json::JT_STRING == node->value_->get_type()) {
|
||||
ObObj obj;
|
||||
OZ (ObHexUtilsBase::unhex(node->value_->get_string(), allocator, obj));
|
||||
if (OB_SUCC(ret) && !obj.is_null()) {
|
||||
origin_null_if_str_ = obj.get_string();
|
||||
}
|
||||
node = node->get_next();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int64_t ObExternalFileFormat::to_string(char *buf, const int64_t buf_len) const
|
||||
{
|
||||
int64_t pos = 0;
|
||||
bool is_valid_format = format_type_ > INVALID_FORMAT && format_type_ < MAX_FORMAT;
|
||||
|
||||
J_OBJ_START();
|
||||
|
||||
databuff_print_kv(buf, buf_len, pos, "\"TYPE\"", is_valid_format ? FORMAT_TYPE_STR[format_type_] : "INVALID");
|
||||
|
||||
switch (format_type_) {
|
||||
case CSV_FORMAT:
|
||||
pos += csv_format_.to_json_kv_string(buf + pos, buf_len - pos);
|
||||
pos += origin_file_format_str_.to_json_kv_string(buf + pos, buf_len - pos);
|
||||
break;
|
||||
default:
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
J_OBJ_END();
|
||||
return pos;
|
||||
}
|
||||
|
||||
int ObExternalFileFormat::load_from_string(const ObString &str, ObIAllocator &allocator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
json::Value *data = NULL;
|
||||
json::Parser parser;
|
||||
ObArenaAllocator temp_allocator;
|
||||
if (OB_UNLIKELY(str.empty())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("format string is empty", K(ret), K(str));
|
||||
} else if (OB_FAIL(parser.init(&temp_allocator))) {
|
||||
LOG_WARN("parser init failed", K(ret));
|
||||
} else if (OB_FAIL(parser.parse(str.ptr(), str.length(), data))) {
|
||||
LOG_WARN("parse json failed", K(ret), K(str));
|
||||
} else if (NULL == data || json::JT_OBJECT != data->get_type()) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("error json value", K(ret), KPC(data));
|
||||
} else {
|
||||
auto format_type_node = data->get_object().get_first();
|
||||
if (format_type_node->value_->get_type() != json::JT_STRING) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected json format", K(ret), K(str));
|
||||
} else {
|
||||
ObString format_type_str = format_type_node->value_->get_string();
|
||||
for (int i = 0; i < array_elements(FORMAT_TYPE_STR); ++i) {
|
||||
if (format_type_str.case_compare(FORMAT_TYPE_STR[i]) == 0) {
|
||||
format_type_ = static_cast<FormatType>(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
format_type_node = format_type_node->get_next();
|
||||
switch (format_type_) {
|
||||
case CSV_FORMAT:
|
||||
OZ (csv_format_.load_from_json_data(format_type_node, allocator));
|
||||
OZ (origin_file_format_str_.load_from_json_data(format_type_node, allocator));
|
||||
break;
|
||||
default:
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("invalid format type", K(ret), K(format_type_str));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExternalFileFormat::StringData::store_str(const ObString &str)
|
||||
{
|
||||
return ob_write_string(allocator_, str, str_);
|
||||
}
|
||||
|
||||
OB_DEF_SERIALIZE(ObExternalFileFormat::StringData)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
LST_DO_CODE(OB_UNIS_ENCODE, str_);
|
||||
return ret;
|
||||
}
|
||||
|
||||
OB_DEF_DESERIALIZE(ObExternalFileFormat::StringData)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObString temp_str;
|
||||
LST_DO_CODE(OB_UNIS_DECODE, temp_str);
|
||||
if (OB_SUCC(ret)) {
|
||||
ret = store_str(temp_str);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
OB_DEF_SERIALIZE_SIZE(ObExternalFileFormat::StringData)
|
||||
{
|
||||
int64_t len = 0;
|
||||
LST_DO_CODE(OB_UNIS_ADD_LEN, str_);
|
||||
return len;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include "common/object/ob_object.h"
|
||||
#include "lib/container/ob_se_array.h"
|
||||
#include "lib/string/ob_string.h"
|
||||
#include "lib/json/ob_json.h"
|
||||
|
||||
#ifndef _OB_LOAD_DATA_PARSER_H_
|
||||
#define _OB_LOAD_DATA_PARSER_H_
|
||||
@ -26,18 +27,54 @@ class ObDataInFileStruct;
|
||||
|
||||
struct ObCSVGeneralFormat {
|
||||
ObCSVGeneralFormat () :
|
||||
line_start_str_(),
|
||||
line_term_str_(),
|
||||
field_term_str_(),
|
||||
field_escaped_char_(INT64_MAX),
|
||||
field_enclosed_char_(INT64_MAX),
|
||||
cs_type_(common::CHARSET_INVALID),
|
||||
skip_header_lines_(0),
|
||||
skip_blank_lines_(false),
|
||||
trim_space_(false),
|
||||
null_if_(),
|
||||
empty_field_as_null_(false),
|
||||
file_column_nums_(0)
|
||||
{}
|
||||
static constexpr const char *OPTION_NAMES[] = {
|
||||
"LINE_DELIMITER",
|
||||
"FIELD_DELIMITER",
|
||||
"ESCAPE",
|
||||
"FIELD_OPTIONALLY_ENCLOSED_BY",
|
||||
"ENCODING",
|
||||
"SKIP_HEADER",
|
||||
"SKIP_BLANK_LINES",
|
||||
"TRIM_SPACE",
|
||||
"NULL_IF_EXETERNAL",
|
||||
"EMPTY_FIELD_AS_NULL",
|
||||
};
|
||||
common::ObString line_start_str_;
|
||||
common::ObString line_term_str_;
|
||||
common::ObString field_term_str_;
|
||||
int64_t field_escaped_char_; // valid escaped char after stmt validation
|
||||
int64_t field_enclosed_char_; // valid enclosed char after stmt validation
|
||||
common::ObCharsetType cs_type_;
|
||||
common::ObCharsetType cs_type_; // charset type of format strings
|
||||
int64_t skip_header_lines_;
|
||||
bool skip_blank_lines_;
|
||||
bool trim_space_;
|
||||
common::ObArrayWrap<common::ObString> null_if_;
|
||||
bool empty_field_as_null_;
|
||||
|
||||
int64_t file_column_nums_;
|
||||
|
||||
int init_format(const ObDataInFileStruct &format,
|
||||
int64_t file_column_nums,
|
||||
ObCollationType file_cs_type);
|
||||
int64_t to_json_kv_string(char* buf, const int64_t buf_len) const;
|
||||
int load_from_json_data(json::Pair *&node, common::ObIAllocator &allocator);
|
||||
|
||||
TO_STRING_KV(K(cs_type_), K(file_column_nums_), K(line_start_str_), K(field_enclosed_char_),
|
||||
K(field_escaped_char_), K(field_term_str_), K(line_term_str_));
|
||||
OB_UNIS_VERSION(1);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -84,20 +121,22 @@ public:
|
||||
};
|
||||
public:
|
||||
ObCSVGeneralParser() {}
|
||||
int init(const ObCSVGeneralFormat &format);
|
||||
|
||||
int init(const ObDataInFileStruct &format,
|
||||
int64_t file_column_nums,
|
||||
common::ObCollationType file_cs_type);
|
||||
const ObCSVGeneralFormat &get_format() { return format_; }
|
||||
const OptParams &get_opt_params() { return opt_param_; }
|
||||
|
||||
template<common::ObCharsetType cs_type, typename handle_func, bool DO_ESCAPE = false>
|
||||
template<common::ObCharsetType cs_type, typename handle_func, bool NEED_ESCAPED_RESULT = false>
|
||||
int scan_proto(const char *&str, const char *end, int64_t &nrows,
|
||||
char *escape_buf, char *escaped_buf_end,
|
||||
handle_func &handle_one_line,
|
||||
common::ObIArray<LineErrRec> &errors,
|
||||
bool is_end_file);
|
||||
|
||||
template<typename handle_func, bool DO_ESCAPE = false>
|
||||
template<typename handle_func, bool NEED_ESCAPED_RESULT = false>
|
||||
int scan(const char *&str, const char *end, int64_t &nrows,
|
||||
char *escape_buf, char *escaped_buf_end,
|
||||
handle_func &handle_one_line,
|
||||
@ -106,20 +145,20 @@ public:
|
||||
int ret = common::OB_SUCCESS;
|
||||
switch (format_.cs_type_) {
|
||||
case common::CHARSET_UTF8MB4:
|
||||
ret = scan_proto<common::CHARSET_UTF8MB4, handle_func, DO_ESCAPE>(
|
||||
ret = scan_proto<common::CHARSET_UTF8MB4, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_GBK:
|
||||
ret = scan_proto<common::CHARSET_GBK, handle_func, DO_ESCAPE>(
|
||||
ret = scan_proto<common::CHARSET_GBK, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_GB18030:
|
||||
case common::CHARSET_GB18030_2022:
|
||||
ret = scan_proto<common::CHARSET_GB18030, handle_func, DO_ESCAPE>(
|
||||
ret = scan_proto<common::CHARSET_GB18030, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
default:
|
||||
ret = scan_proto<common::CHARSET_BINARY, handle_func, DO_ESCAPE>(
|
||||
ret = scan_proto<common::CHARSET_BINARY, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
}
|
||||
@ -128,6 +167,7 @@ public:
|
||||
common::ObIArray<FieldValue>& get_fields_per_line() { return fields_per_line_; }
|
||||
|
||||
private:
|
||||
int init_opt_variables();
|
||||
template<common::ObCharsetType cs_type>
|
||||
inline int mbcharlen(const char *ptr, const char *end) {
|
||||
UNUSED(ptr);
|
||||
@ -157,29 +197,52 @@ private:
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
inline
|
||||
bool is_null_field(bool is_escaped, const char* field_begin, const char* field_end) {
|
||||
return (is_escaped && field_end - field_begin == 1 && 'N' == *field_begin)
|
||||
|| (format_.field_enclosed_char_ != INT64_MAX
|
||||
&& field_end - field_begin == 4
|
||||
&& 0 == strncasecmp(field_begin, "NULL", 4)
|
||||
&& !is_escaped);
|
||||
bool is_null_field(const char* ori_field_begin, int64_t ori_field_len,
|
||||
const char* final_field_begin, int64_t final_field_len) {
|
||||
bool ret = false;
|
||||
|
||||
if ((2 == ori_field_len && format_.field_escaped_char_ == ori_field_begin[0] && 'N' == ori_field_begin[1])
|
||||
|| (format_.field_enclosed_char_ != INT64_MAX && 4 == ori_field_len && 0 == MEMCMP(ori_field_begin, "NULL", 4))
|
||||
|| (format_.empty_field_as_null_ && 0 == final_field_len)) {
|
||||
ret = true;
|
||||
} else {
|
||||
for (int i = 0; i < format_.null_if_.count(); i++) {
|
||||
if (format_.null_if_.at(i).length() == final_field_len
|
||||
&& 0 == MEMCMP(final_field_begin, format_.null_if_.at(i).ptr(), final_field_len)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline
|
||||
void gen_new_field(bool is_enclosed, bool is_escaped,
|
||||
const char *field_begin, const char *field_end, int field_idx) {
|
||||
int32_t str_len = static_cast<int32_t>(field_end - field_begin);
|
||||
void gen_new_field(const bool is_enclosed, const char *ori_field_begin, const char *ori_field_end,
|
||||
const char *field_begin, const char *field_end, const int field_idx) {
|
||||
FieldValue &new_field = fields_per_line_[field_idx - 1];
|
||||
new_field = FieldValue();
|
||||
if (!is_enclosed && is_null_field(is_escaped, field_begin, field_end)) {
|
||||
if (format_.trim_space_) {
|
||||
while (field_begin < field_end && ' ' == *field_begin) field_begin++;
|
||||
while (field_begin < field_end && ' ' == *(field_end - 1)) field_end--;
|
||||
}
|
||||
if (is_null_field(ori_field_begin, ori_field_end - ori_field_begin, field_begin, field_end - field_begin)) {
|
||||
new_field.is_null_ = 1;
|
||||
} else {
|
||||
new_field.ptr_ = const_cast<char*>(field_begin);
|
||||
new_field.len_ = str_len;
|
||||
new_field.len_ = static_cast<int32_t>(field_end - field_begin);
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_escape_next(const bool is_enclosed, const char cur, const char next) {
|
||||
// 1. the next char escaped by escape_char "A\tB" => A B
|
||||
// 2. enclosed char escaped by another enclosed char in enclosed field. E.g. "A""B" => A"B
|
||||
return (format_.field_escaped_char_ == cur && !opt_param_.is_same_escape_enclosed_)
|
||||
|| (is_enclosed && format_.field_enclosed_char_ == cur && format_.field_enclosed_char_ == next);
|
||||
}
|
||||
|
||||
protected:
|
||||
ObCSVGeneralFormat format_;
|
||||
common::ObSEArray<FieldValue, 1> fields_per_line_;
|
||||
@ -230,7 +293,7 @@ inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_GB18030>(const char *pt
|
||||
return mb_len;
|
||||
}
|
||||
|
||||
template<common::ObCharsetType cs_type, typename handle_func, bool DO_ESCAPE>
|
||||
template<common::ObCharsetType cs_type, typename handle_func, bool NEED_ESCAPED_RESULT>
|
||||
int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
const char *end,
|
||||
int64_t &nrows,
|
||||
@ -241,18 +304,18 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
bool is_end_file)
|
||||
{
|
||||
int ret = common::OB_SUCCESS;
|
||||
|
||||
int line_no = 0;
|
||||
int blank_line_cnt = 0;
|
||||
const char *line_begin = str;
|
||||
char *escape_buf_pos = escape_buf;
|
||||
|
||||
if (DO_ESCAPE) {
|
||||
if (NEED_ESCAPED_RESULT) {
|
||||
if (escape_buf_end - escape_buf < end - str) {
|
||||
ret = common::OB_BUF_NOT_ENOUGH;
|
||||
}
|
||||
}
|
||||
|
||||
while (OB_SUCC(ret) && str < end && line_no < nrows) {
|
||||
char *escape_buf_pos = escape_buf;
|
||||
while (OB_SUCC(ret) && str < end && line_no - blank_line_cnt < nrows) {
|
||||
bool find_new_line = false;
|
||||
int field_idx = 0;
|
||||
|
||||
@ -276,6 +339,7 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
}
|
||||
|
||||
while (str < end && !find_new_line) {
|
||||
const char *ori_field_begin = str;
|
||||
const char *field_begin = str;
|
||||
bool is_enclosed = false;
|
||||
const char *last_end_enclosed = nullptr;
|
||||
@ -290,29 +354,23 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
}
|
||||
while (str < end && !is_term) {
|
||||
const char *next = str + 1;
|
||||
if (next < end && ((format_.field_escaped_char_ == *str && !opt_param_.is_same_escape_enclosed_)
|
||||
|| (is_enclosed && format_.field_enclosed_char_ == *str && format_.field_enclosed_char_ == *next))) {
|
||||
bool is_valid_escape = (1 == mbcharlen<cs_type>(next, end));
|
||||
if (DO_ESCAPE) {
|
||||
if (next < end && is_escape_next(is_enclosed, *str, *next)) {
|
||||
if (NEED_ESCAPED_RESULT) {
|
||||
if (last_escaped_str == nullptr) {
|
||||
last_escaped_str = field_begin;
|
||||
field_begin = escape_buf_pos;
|
||||
}
|
||||
int copy_len = str - last_escaped_str;
|
||||
int64_t copy_len = str - last_escaped_str;
|
||||
//if (OB_UNLIKELY(escape_buf_pos + copy_len + 1 > escape_buf_end)) {
|
||||
// ret = common::OB_SIZE_OVERFLOW; break;
|
||||
//} else {
|
||||
MEMCPY(escape_buf_pos, last_escaped_str, copy_len);
|
||||
escape_buf_pos+=copy_len;
|
||||
if (is_valid_escape) {
|
||||
*(escape_buf_pos++) = escape(*next);
|
||||
last_escaped_str = next + 1;
|
||||
} else {
|
||||
last_escaped_str = next;
|
||||
}
|
||||
*(escape_buf_pos++) = escape(*next);
|
||||
last_escaped_str = next + 1;
|
||||
//}
|
||||
}
|
||||
str += (OB_LIKELY(is_valid_escape) ? 2 : 1);
|
||||
str += 2;
|
||||
} else if (format_.field_enclosed_char_ == *str) {
|
||||
last_end_enclosed = str;
|
||||
str++;
|
||||
@ -337,6 +395,7 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
}
|
||||
|
||||
if (OB_LIKELY(is_term) || is_end_file) {
|
||||
const char *ori_field_end = str;
|
||||
const char *field_end = str;
|
||||
if (is_enclosed && field_end - 1 == last_end_enclosed) {
|
||||
field_begin++;
|
||||
@ -348,9 +407,9 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
str = end;
|
||||
}
|
||||
|
||||
if (DO_ESCAPE) {
|
||||
if (NEED_ESCAPED_RESULT) {
|
||||
if (last_escaped_str != nullptr) {
|
||||
int copy_len = field_end - last_escaped_str;
|
||||
int64_t copy_len = field_end - last_escaped_str;
|
||||
//if (OB_UNLIKELY(escape_buf_pos + copy_len > escape_buf_end)) {
|
||||
// ret = common::OB_SIZE_OVERFLOW;
|
||||
//} else {
|
||||
@ -363,9 +422,10 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
}
|
||||
}
|
||||
|
||||
if (is_field_term || field_end > field_begin || field_idx < format_.file_column_nums_) {
|
||||
if (is_field_term || ori_field_end > ori_field_begin
|
||||
|| (field_idx < format_.file_column_nums_ && !format_.skip_blank_lines_)) {
|
||||
if (field_idx++ < format_.file_column_nums_) {
|
||||
gen_new_field(is_enclosed, last_escaped_str != nullptr, field_begin, field_end, field_idx);
|
||||
gen_new_field(is_enclosed, ori_field_begin, ori_field_end, field_begin, field_end, field_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -376,11 +436,17 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
}
|
||||
}
|
||||
if (OB_LIKELY(find_new_line) || is_end_file) {
|
||||
if (field_idx != format_.file_column_nums_) {
|
||||
ret = handle_irregular_line(field_idx, line_no, errors);
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
ret = handle_one_line(fields_per_line_);
|
||||
if (!format_.skip_blank_lines_ || field_idx > 0) {
|
||||
if (field_idx != format_.file_column_nums_) {
|
||||
ret = handle_irregular_line(field_idx, line_no, errors);
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
ret = handle_one_line(fields_per_line_);
|
||||
}
|
||||
} else {
|
||||
if (format_.skip_blank_lines_) {
|
||||
blank_line_cnt++;
|
||||
}
|
||||
}
|
||||
line_no++;
|
||||
line_begin = str;
|
||||
@ -389,10 +455,61 @@ int ObCSVGeneralParser::scan_proto(const char *&str,
|
||||
|
||||
str = line_begin;
|
||||
nrows = line_no;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// user using to define create external table format
|
||||
struct ObOriginFileFormat
|
||||
{
|
||||
int64_t to_json_kv_string(char *buf, const int64_t buf_len) const;
|
||||
int load_from_json_data(json::Pair *&node, common::ObIAllocator &allocator);
|
||||
TO_STRING_KV(K(origin_line_term_str_), K(origin_field_term_str_), K(origin_field_escaped_str_),
|
||||
K(origin_field_enclosed_str_), K(origin_null_if_str_));
|
||||
|
||||
static constexpr const char *ORIGIN_FORMAT_STRING[] = {
|
||||
"ORIGIN_LINE_DELIMITER",
|
||||
"ORIGIN_FIELD_DELIMITER",
|
||||
"ORIGIN_ESCAPE",
|
||||
"ORIGIN_FIELD_OPTIONALLY_ENCLOSED_BY",
|
||||
"ORIGIN_NULL_IF_EXETERNAL",
|
||||
};
|
||||
|
||||
common::ObString origin_line_term_str_;
|
||||
common::ObString origin_field_term_str_;
|
||||
common::ObString origin_field_escaped_str_;
|
||||
common::ObString origin_field_enclosed_str_;
|
||||
common::ObString origin_null_if_str_;
|
||||
};
|
||||
|
||||
struct ObExternalFileFormat
|
||||
{
|
||||
struct StringData {
|
||||
StringData(common::ObIAllocator &alloc) : allocator_(alloc) {}
|
||||
int store_str(const ObString &str);
|
||||
common::ObString str_;
|
||||
common::ObIAllocator &allocator_;
|
||||
TO_STRING_KV(K_(str));
|
||||
OB_UNIS_VERSION(1);
|
||||
};
|
||||
|
||||
enum FormatType {
|
||||
INVALID_FORMAT = -1,
|
||||
CSV_FORMAT,
|
||||
MAX_FORMAT
|
||||
};
|
||||
|
||||
|
||||
ObExternalFileFormat() : format_type_(INVALID_FORMAT) {}
|
||||
|
||||
int64_t to_string(char* buf, const int64_t buf_len) const;
|
||||
int load_from_string(const common::ObString &str, ObIAllocator &allocator);
|
||||
|
||||
ObOriginFileFormat origin_file_format_str_;
|
||||
FormatType format_type_;
|
||||
sql::ObCSVGeneralFormat csv_format_;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -44,6 +44,15 @@
|
||||
#include "observer/ob_server_struct.h"
|
||||
#include "observer/ob_server.h"
|
||||
#include "lib/worker.h"
|
||||
#include "share/external_table/ob_external_table_file_mgr.h"
|
||||
#include "share/external_table/ob_external_table_file_task.h"
|
||||
#include "share/external_table/ob_external_table_file_rpc_proxy.h"
|
||||
#include "observer/ob_srv_network_frame.h"
|
||||
#include "observer/dbms_job/ob_dbms_job_master.h"
|
||||
#include "observer/ob_inner_sql_connection_pool.h"
|
||||
#include "share/backup/ob_backup_io_adapter.h"
|
||||
#include "share/external_table/ob_external_table_file_rpc_processor.h"
|
||||
#include "share/external_table/ob_external_table_utils.h"
|
||||
namespace oceanbase
|
||||
{
|
||||
using namespace common;
|
||||
@ -469,7 +478,8 @@ int ObCreateTableExecutor::execute(ObExecContext &ctx, ObCreateTableStmt &stmt)
|
||||
if (OB_ISNULL(task_exec_ctx = GET_TASK_EXECUTOR_CTX(ctx))) {
|
||||
ret = OB_NOT_INIT;
|
||||
LOG_WARN("get task executor context failed", K(ret));
|
||||
} else if (OB_FAIL(ObPartitionExecutorUtils::calc_values_exprs(ctx, stmt))) {
|
||||
} else if (!table_schema.is_external_table() //external table can not define partitions by create table stmt
|
||||
&& OB_FAIL(ObPartitionExecutorUtils::calc_values_exprs(ctx, stmt))) {
|
||||
LOG_WARN("compare range parition expr fail", K(ret));
|
||||
} else if (OB_FAIL(set_index_arg_list(ctx, stmt))) {
|
||||
LOG_WARN("fail to set index_arg_list", K(ret));
|
||||
@ -483,9 +493,24 @@ int ObCreateTableExecutor::execute(ObExecContext &ctx, ObCreateTableStmt &stmt)
|
||||
LOG_WARN("schema_guard reset failed", K(ret));
|
||||
} else if (OB_FAIL(common_rpc_proxy->create_table(create_table_arg, res))) {
|
||||
LOG_WARN("rpc proxy create table failed", K(ret), "dst", common_rpc_proxy->get_server());
|
||||
} else { /* do nothing */ }
|
||||
} else if (OB_FAIL(execute_ctas(ctx, stmt, common_rpc_proxy))){ // 查询建表的处理
|
||||
LOG_WARN("execute create table as select failed", K(ret));
|
||||
} else {
|
||||
if (table_schema.is_external_table()) {
|
||||
//auto refresh after create external table
|
||||
OZ (ObAlterTableExecutor::update_external_file_list(
|
||||
table_schema.get_tenant_id(), res.table_id_,
|
||||
table_schema.get_external_file_location(),
|
||||
table_schema.get_external_file_location_access_info(),
|
||||
table_schema.get_external_file_pattern(),
|
||||
ctx));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (table_schema.is_external_table()) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "create external table as select");
|
||||
} else if (OB_FAIL(execute_ctas(ctx, stmt, common_rpc_proxy))){ // 查询建表的处理
|
||||
LOG_WARN("execute create table as select failed", K(ret));
|
||||
}
|
||||
}
|
||||
|
||||
// only CTAS or create temperary table will make session_id != 0. If such table detected, set
|
||||
@ -743,6 +768,311 @@ int ObAlterTableExecutor::alter_table_rpc_v2(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::get_external_file_list(const ObString &location,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes,
|
||||
const ObString &access_info,
|
||||
ObIAllocator &allocator,
|
||||
common::ObStorageType &storage_type)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObExternalDataAccessDriver driver;
|
||||
if (OB_FAIL(driver.init(location, access_info))) {
|
||||
LOG_WARN("init external data access driver failed", K(ret));
|
||||
} else if (OB_FAIL(driver.get_file_list(location, file_urls, allocator))) {
|
||||
LOG_WARN("get file urls failed", K(ret));
|
||||
} else if (OB_FAIL(driver.get_file_sizes(location, file_urls, file_sizes))) {
|
||||
LOG_WARN("get file sizes failed", K(ret));
|
||||
}
|
||||
if (driver.is_opened()) {
|
||||
storage_type = driver.get_storage_type();
|
||||
driver.close();
|
||||
}
|
||||
|
||||
LOG_DEBUG("show external table files", K(file_urls), K(storage_type), K(access_info));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::filter_and_sort_external_files(const ObString &pattern,
|
||||
ObExecContext &exec_ctx,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes) {
|
||||
int ret = OB_SUCCESS;
|
||||
const int64_t count = file_urls.count();
|
||||
ObSEArray<int64_t, 8> tmp_file_sizes;
|
||||
hash::ObHashMap<ObString, int64_t> file_map;
|
||||
if (0 == count) {
|
||||
/* do nothing */
|
||||
} else if (OB_UNLIKELY(count != file_sizes.count())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("array size error", K(ret));
|
||||
} else if (OB_FAIL(file_map.create(count, "ExtFileMap", "ExtFileMap"))) {
|
||||
LOG_WARN("fail to init hashmap", K(ret));
|
||||
} else {
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < count; ++i) {
|
||||
if (OB_FAIL(file_map.set_refactored(file_urls.at(i), file_sizes.at(i)))) {
|
||||
LOG_WARN("failed to set refactored to file_map", K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (OB_FAIL(ObExternalTableUtils::filter_external_table_files(pattern, exec_ctx, file_urls))) {
|
||||
LOG_WARN("failed to filter external table files");
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
std::sort(file_urls.get_data(), file_urls.get_data() + file_urls.count());
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < file_urls.count(); ++i) {
|
||||
int64_t file_size = 0;
|
||||
if (OB_FAIL(file_map.get_refactored(file_urls.at(i), file_size))) {
|
||||
if (OB_UNLIKELY(OB_HASH_NOT_EXIST == ret)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
}
|
||||
LOG_WARN("failed to get key meta", K(ret));
|
||||
} else if (OB_FAIL(tmp_file_sizes.push_back(file_size))) {
|
||||
LOG_WARN("failed to push back into tmp_file_sizes", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (OB_FAIL(file_sizes.assign(tmp_file_sizes))) {
|
||||
LOG_WARN("failed to assign file_sizes", K(ret));
|
||||
} else if (OB_FAIL(file_map.destroy())) {
|
||||
LOG_WARN("failed to destory file_map");
|
||||
}
|
||||
}
|
||||
}
|
||||
LOG_TRACE("after filter external table files", K(ret), K(file_urls));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::flush_external_file_cache(
|
||||
const uint64_t tenant_id,
|
||||
const uint64_t table_id,
|
||||
const ObIArray<ObAddr> &all_servers)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObArenaAllocator allocator;
|
||||
ObAsyncRpcTaskWaitContext<ObRpcAsyncFlushExternalTableKVCacheCallBack> context;
|
||||
int64_t send_task_count = 0;
|
||||
OZ (context.init());
|
||||
OZ (context.get_cb_list().reserve(all_servers.count()));
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < all_servers.count(); i++) {
|
||||
ObFlushExternalTableFileCacheReq req;
|
||||
int64_t timeout = ObExternalTableFileManager::CACHE_EXPIRE_TIME;
|
||||
req.tenant_id_ = tenant_id;
|
||||
req.table_id_ = table_id;
|
||||
req.partition_id_ = 0;
|
||||
ObRpcAsyncFlushExternalTableKVCacheCallBack* async_cb = nullptr;
|
||||
if (OB_ISNULL(async_cb = OB_NEWx(ObRpcAsyncFlushExternalTableKVCacheCallBack, (&allocator), (&context)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("failed to allocate async cb memory", K(ret));
|
||||
}
|
||||
OZ (context.get_cb_list().push_back(async_cb));
|
||||
OZ (GCTX.external_table_proxy_->to(all_servers.at(i))
|
||||
.by(tenant_id)
|
||||
.timeout(timeout)
|
||||
.flush_file_kvcahce(req, async_cb));
|
||||
if (OB_SUCC(ret)) {
|
||||
send_task_count++;
|
||||
}
|
||||
}
|
||||
|
||||
context.set_task_count(send_task_count);
|
||||
|
||||
do {
|
||||
int temp_ret = context.wait_executing_tasks();
|
||||
if (OB_SUCCESS != temp_ret) {
|
||||
LOG_WARN("fail to wait executing task", K(temp_ret));
|
||||
if (OB_SUCC(ret)) {
|
||||
ret = temp_ret;
|
||||
}
|
||||
}
|
||||
} while(0);
|
||||
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < context.get_cb_list().count(); i++) {
|
||||
ret = context.get_cb_list().at(i)->get_task_resp().rcode_.rcode_;
|
||||
if (OB_FAIL(ret)) {
|
||||
if (OB_TIMEOUT == ret) {
|
||||
// flush timeout is OK, because the file cache has already expire
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
LOG_WARN("async flush kvcache process failed", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int64_t i = 0; i < context.get_cb_list().count(); i++) {
|
||||
context.get_cb_list().at(i)->~ObRpcAsyncFlushExternalTableKVCacheCallBack();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::collect_local_files_on_servers(
|
||||
const uint64_t tenant_id,
|
||||
const ObString &location,
|
||||
ObIArray<ObAddr> &all_servers,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes,
|
||||
ObIAllocator &allocator)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObSEArray<ObAddr, 8> target_servers;
|
||||
ObArray<ObString> server_ip_port;
|
||||
|
||||
bool is_absolute_path = false;
|
||||
const int64_t PREFIX_LEN = STRLEN(OB_FILE_PREFIX);
|
||||
if (location.length() <= PREFIX_LEN) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("invalid location", K(ret), K(location));
|
||||
} else {
|
||||
is_absolute_path = ('/' == location.ptr()[PREFIX_LEN]);
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
if (is_absolute_path) {
|
||||
std::sort(all_servers.get_data(), all_servers.get_data() + all_servers.count(),
|
||||
[](const ObAddr &l, const ObAddr &r) -> bool { return l < r; });
|
||||
ObAddr pre_addr;
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < all_servers.count(); i++) {
|
||||
ObAddr &cur_addr = all_servers.at(i);
|
||||
if (!cur_addr.is_equal_except_port(pre_addr)) {
|
||||
pre_addr = cur_addr;
|
||||
OZ(target_servers.push_back(cur_addr));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OZ (target_servers.assign(all_servers));
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
ObAsyncRpcTaskWaitContext<ObRpcAsyncLoadExternalTableFileCallBack> context;
|
||||
int64_t send_task_count = 0;
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < target_servers.count(); i++) {
|
||||
const int64_t ip_len = 64;
|
||||
char *ip_port_buffer = nullptr;
|
||||
if (OB_ISNULL(ip_port_buffer = (char*)(allocator.alloc(ip_len)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("failed to allocate ip memory", K(ret));
|
||||
}
|
||||
OZ (target_servers.at(i).ip_port_to_string(ip_port_buffer, ip_len));
|
||||
OZ (server_ip_port.push_back(ObString(ip_port_buffer)));
|
||||
}
|
||||
OZ (context.init());
|
||||
OZ (context.get_cb_list().reserve(target_servers.count()));
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < target_servers.count(); i++) {
|
||||
const int64_t timeout = 10 * 1000000L; //10s
|
||||
ObRpcAsyncLoadExternalTableFileCallBack* async_cb = nullptr;
|
||||
ObLoadExternalFileListReq req;
|
||||
req.location_ = location;
|
||||
|
||||
if (OB_ISNULL(async_cb = OB_NEWx(ObRpcAsyncLoadExternalTableFileCallBack, (&allocator), (&context)))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("failed to allocate async cb memory", K(ret));
|
||||
}
|
||||
OZ (context.get_cb_list().push_back(async_cb));
|
||||
OZ (GCTX.external_table_proxy_->to(target_servers.at(i))
|
||||
.by(tenant_id)
|
||||
.timeout(timeout)
|
||||
.load_external_file_list(req, async_cb));
|
||||
if (OB_SUCC(ret)) {
|
||||
send_task_count++;
|
||||
}
|
||||
}
|
||||
|
||||
context.set_task_count(send_task_count);
|
||||
|
||||
do {
|
||||
int temp_ret = context.wait_executing_tasks();
|
||||
if (OB_SUCCESS != temp_ret) {
|
||||
LOG_WARN("fail to wait executing task", K(temp_ret));
|
||||
if (OB_SUCC(ret)) {
|
||||
ret = temp_ret;
|
||||
}
|
||||
}
|
||||
} while(0);
|
||||
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < context.get_cb_list().count(); i++) {
|
||||
if (OB_FAIL(context.get_cb_list().at(i)->get_task_resp().rcode_.rcode_)) {
|
||||
LOG_WARN("async load files process failed", K(ret));
|
||||
} else {
|
||||
const ObIArray<ObString> &resp_array = context.get_cb_list().at(i)->get_task_resp().file_urls_;
|
||||
OZ (append(file_sizes, context.get_cb_list().at(i)->get_task_resp().file_sizes_));
|
||||
for (int64_t j = 0; OB_SUCC(ret) && j < resp_array.count(); j++) {
|
||||
ObSqlString tmp_file_url;
|
||||
ObString file_url;
|
||||
OZ (tmp_file_url.append(server_ip_port.at(i)));
|
||||
OZ (tmp_file_url.append("%"));
|
||||
OZ (tmp_file_url.append(resp_array.at(j)));
|
||||
OZ (ob_write_string(allocator, tmp_file_url.string(), file_url));
|
||||
OZ (file_urls.push_back(file_url));
|
||||
}
|
||||
}
|
||||
LOG_DEBUG("get external table file", K(context.get_cb_list().at(i)->get_task_resp().file_urls_));
|
||||
}
|
||||
|
||||
for (int64_t i = 0; i < context.get_cb_list().count(); i++) {
|
||||
context.get_cb_list().at(i)->~ObRpcAsyncLoadExternalTableFileCallBack();
|
||||
}
|
||||
}
|
||||
LOG_DEBUG("update external table file list", K(ret), K(file_urls));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::update_external_file_list(
|
||||
const uint64_t tenant_id,
|
||||
const uint64_t table_id,
|
||||
const ObString &location,
|
||||
const ObString &access_info,
|
||||
const ObString &pattern,
|
||||
ObExecContext &exec_ctx)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObSEArray<ObString, 8> file_urls;
|
||||
ObSEArray<int64_t, 8> file_sizes;
|
||||
ObArenaAllocator allocator;
|
||||
ObSEArray<ObAddr, 8> all_servers;
|
||||
OZ (GCTX.location_service_->external_table_get(tenant_id, table_id, all_servers));
|
||||
|
||||
if (ObSQLUtils::is_external_files_on_local_disk(location)) {
|
||||
OZ (collect_local_files_on_servers(tenant_id, location, all_servers, file_urls, file_sizes, allocator));
|
||||
} else {
|
||||
OZ (ObExternalTableFileManager::get_instance().get_external_file_list_on_device(
|
||||
location, file_urls, file_sizes, access_info, allocator));
|
||||
}
|
||||
|
||||
OZ (filter_and_sort_external_files(pattern, exec_ctx, file_urls, file_sizes));
|
||||
|
||||
//TODO [External Table] opt performance
|
||||
OZ (ObExternalTableFileManager::get_instance().update_inner_table_file_list(tenant_id, table_id, file_urls, file_sizes));
|
||||
|
||||
OZ (flush_external_file_cache(tenant_id, table_id, all_servers));
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::execute_alter_external_table(ObExecContext &ctx, ObAlterTableStmt &stmt)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
obrpc::ObAlterTableArg &arg = stmt.get_alter_table_arg();
|
||||
int64_t option = stmt.get_alter_external_table_type();
|
||||
switch (option) {
|
||||
case T_ALTER_REFRESH_EXTERNAL_TABLE: {
|
||||
OZ (update_external_file_list(stmt.get_tenant_id(),
|
||||
arg.alter_table_schema_.get_table_id(),
|
||||
arg.alter_table_schema_.get_external_file_location(),
|
||||
arg.alter_table_schema_.get_external_file_location_access_info(),
|
||||
arg.alter_table_schema_.get_external_file_pattern(),
|
||||
ctx));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected option", K(ret), K(option));
|
||||
}
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObAlterTableExecutor::execute(ObExecContext &ctx, ObAlterTableStmt &stmt)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -764,6 +1094,8 @@ int ObAlterTableExecutor::execute(ObExecContext &ctx, ObAlterTableStmt &stmt)
|
||||
stmt.get_tg_arg().ddl_stmt_str_ = first_stmt;
|
||||
OZ (common_rpc_proxy->alter_trigger(stmt.get_tg_arg()), common_rpc_proxy->get_server());
|
||||
}
|
||||
} else if (alter_table_arg.alter_table_schema_.is_external_table()) {
|
||||
OZ (execute_alter_external_table(ctx, stmt));
|
||||
} else {
|
||||
ObSQLSessionInfo *my_session = NULL;
|
||||
obrpc::ObAlterTableRes res;
|
||||
@ -1626,6 +1958,8 @@ int ObDropTableExecutor::execute(ObExecContext &ctx, ObDropTableStmt &stmt)
|
||||
LOG_WARN("rpc proxy drop table failed", K(ret), "dst", common_rpc_proxy->get_server());
|
||||
} else if (res.is_valid() && OB_FAIL(ObDDLExecutorUtil::wait_ddl_retry_task_finish(res.tenant_id_, res.task_id_, *my_session, common_rpc_proxy, affected_rows))) {
|
||||
LOG_WARN("wait ddl finish failed", K(ret), K(res.tenant_id_), K(res.task_id_));
|
||||
} else {
|
||||
//do nothing
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
@ -77,13 +77,31 @@ public:
|
||||
ObAlterTableExecutor();
|
||||
virtual ~ObAlterTableExecutor();
|
||||
int execute(ObExecContext &ctx, ObAlterTableStmt &stmt);
|
||||
|
||||
static int update_external_file_list(
|
||||
const uint64_t tenant_id,
|
||||
const uint64_t table_id,
|
||||
const common::ObString &location,
|
||||
const common::ObString &access_info,
|
||||
const common::ObString &pattern,
|
||||
ObExecContext &ctx);
|
||||
static int collect_local_files_on_servers(
|
||||
const uint64_t tenant_id,
|
||||
const common::ObString &location,
|
||||
common::ObIArray<common::ObAddr> &all_servers,
|
||||
common::ObIArray<common::ObString> &file_urls,
|
||||
common::ObIArray<int64_t> &file_sizes,
|
||||
common::ObIAllocator &allocator);
|
||||
static int flush_external_file_cache(
|
||||
const uint64_t tenant_id,
|
||||
const uint64_t table_id,
|
||||
const common::ObIArray<common::ObAddr> &all_servers);
|
||||
private:
|
||||
static const int64_t TIME_INTERVAL_PER_PART_US = 50 * 1000; // 50ms
|
||||
static const int64_t MAX_WAIT_CHECK_SCHEMA_VERSION_INTERVAL_US = 120LL * 1000000LL; // 120s
|
||||
static const int64_t MIN_WAIT_CHECK_SCHEMA_VERSION_INTERVAL_US = 20LL * 1000000LL; // 20s
|
||||
static const int64_t WAIT_US = 500 * 1000; // 500ms
|
||||
static const int64_t GET_ASSOCIATED_SNAPSHOT_TIMEOUT = 9000000LL; // 9s
|
||||
|
||||
int check_constraint_validity(ObExecContext &ctx,
|
||||
obrpc::ObAlterTableArg &alter_table_arg,
|
||||
common::ObIAllocator &allocator,
|
||||
@ -158,6 +176,18 @@ private:
|
||||
int set_index_arg_list(ObExecContext &ctx, ObAlterTableStmt &stmt);
|
||||
|
||||
int refresh_schema_for_table(const uint64_t tenant_id);
|
||||
int execute_alter_external_table(ObExecContext &ctx, ObAlterTableStmt &stmt);
|
||||
static int get_external_file_list(
|
||||
const ObString &location,
|
||||
common::ObIArray<common::ObString> &file_urls,
|
||||
common::ObIArray<int64_t> &file_sizes,
|
||||
const common::ObString &access_info,
|
||||
common::ObIAllocator &allocator,
|
||||
common::ObStorageType &storage_type);
|
||||
static int filter_and_sort_external_files(const ObString &pattern,
|
||||
ObExecContext &exec_ctx,
|
||||
ObIArray<ObString> &file_urls,
|
||||
ObIArray<int64_t> &file_sizes);
|
||||
private:
|
||||
//DISALLOW_COPY_AND_ASSIGN(ObAlterTableExecutor);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user