fix load data parser bug
This commit is contained in:
@ -1264,29 +1264,29 @@ int ObCSVParser::next_line(bool& yield_line)
|
||||
bool yield = false;
|
||||
int with_back_slash = 0;
|
||||
|
||||
for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_) {
|
||||
|
||||
for (; !yield && cur_pos_ != buf_end_pos_; ++cur_pos_, ++cur_field_end_pos_) {
|
||||
bool line_term_matched = false;
|
||||
if (*cur_pos_ == formats_.enclose_char_ && !in_enclose_flag_ && cur_pos_ == cur_field_begin_pos_) {
|
||||
in_enclose_flag_ = true;
|
||||
}
|
||||
|
||||
if (!is_escaped_flag_ && *cur_pos_ == formats_.escape_char_) {
|
||||
is_escaped_flag_ = true;
|
||||
last_end_enclosed_ = NULL;
|
||||
} else if ((*cur_pos_ == formats_.escape_char_ && formats_.escape_char_ != formats_.enclose_char_) ||
|
||||
(in_enclose_flag_ && formats_.enclose_char_ == *cur_pos_ && cur_pos_ < buf_end_pos_ &&
|
||||
formats_.enclose_char_ == *(cur_pos_ + 1))) {
|
||||
if (cur_pos_ < buf_end_pos_) {
|
||||
cur_pos_++;
|
||||
if (!is_fast_parse_) {
|
||||
*cur_field_end_pos_ = escaped_char(*cur_pos_, &with_back_slash);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
char escaped_res = *cur_pos_;
|
||||
if (is_escaped_flag_) {
|
||||
escaped_res = escaped_char(*cur_pos_, &with_back_slash);
|
||||
if (cur_field_end_pos_ != cur_pos_ && !is_fast_parse_) {
|
||||
*cur_field_end_pos_ = *cur_pos_;
|
||||
}
|
||||
bool has_escaped = cur_field_end_pos_ != cur_pos_;
|
||||
if (has_escaped && !is_fast_parse_) {
|
||||
*cur_field_end_pos_ = escaped_res;
|
||||
}
|
||||
|
||||
bool line_term_matched = false;
|
||||
|
||||
if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
|
||||
if (formats_.enclose_char_ == *cur_pos_) {
|
||||
last_end_enclosed_ = cur_field_end_pos_;
|
||||
} else if (is_terminate_char(*cur_pos_, cur_field_end_pos_, line_term_matched)) {
|
||||
if (!line_term_matched || cur_field_begin_pos_ < cur_pos_) {
|
||||
handle_one_field(cur_field_end_pos_, has_escaped);
|
||||
handle_one_field(cur_field_end_pos_, cur_field_end_pos_ != cur_pos_);
|
||||
field_id_++;
|
||||
}
|
||||
char* next_pos = cur_pos_ + 1;
|
||||
@ -1303,12 +1303,6 @@ int ObCSVParser::next_line(bool& yield_line)
|
||||
cur_line_begin_pos_ = next_pos;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_escaped_flag_) {
|
||||
is_escaped_flag_ = false;
|
||||
}
|
||||
|
||||
++cur_field_end_pos_;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -542,6 +542,7 @@ public:
|
||||
cur_line_begin_pos_ = NULL;
|
||||
buf_begin_pos_ = NULL;
|
||||
buf_end_pos_ = NULL;
|
||||
last_end_enclosed_ = NULL;
|
||||
field_id_ = 0;
|
||||
in_enclose_flag_ = false;
|
||||
is_escaped_flag_ = false;
|
||||
@ -590,12 +591,13 @@ private:
|
||||
common::ObBitSet<> string_type_column_;
|
||||
// parsing state variables
|
||||
bool is_last_buf_;
|
||||
char* cur_pos_;
|
||||
char* cur_field_begin_pos_;
|
||||
char* cur_field_end_pos_;
|
||||
char* cur_line_begin_pos_;
|
||||
char* buf_begin_pos_;
|
||||
char* buf_end_pos_;
|
||||
char *cur_pos_;
|
||||
char *cur_field_begin_pos_;
|
||||
char *cur_field_end_pos_;
|
||||
char *cur_line_begin_pos_;
|
||||
char *buf_begin_pos_;
|
||||
char *buf_end_pos_;
|
||||
char *last_end_enclosed_;
|
||||
int64_t field_id_;
|
||||
bool in_enclose_flag_;
|
||||
bool is_escaped_flag_;
|
||||
@ -643,10 +645,8 @@ OB_INLINE bool ObCSVParser::is_terminate_char(char cur_char, char*& cur_pos, boo
|
||||
if (!in_enclose_flag_) {
|
||||
ret_bool = true; // return true
|
||||
} else {
|
||||
char* pre_pos = cur_pos - 1;
|
||||
// with in_enclose_flag_ = true, a term char is valid only if an enclosed char before it
|
||||
if (static_cast<int64_t>(*pre_pos) == formats_.enclose_char_ &&
|
||||
cur_field_begin_pos_ != pre_pos) { // 123---->'---->123
|
||||
if (last_end_enclosed_ == cur_pos - 1) {
|
||||
remove_enclosed_char(cur_pos);
|
||||
ret_bool = true; // return true
|
||||
} else {
|
||||
|
@ -534,12 +534,14 @@ int ObLoadDataResolver::validate_stmt(ObLoadDataStmt* stmt)
|
||||
escape_char = (data_struct_in_file.field_escaped_str_.empty()
|
||||
? INT64_MAX
|
||||
: static_cast<int64_t>(data_struct_in_file.field_escaped_str_[0]));
|
||||
/*
|
||||
if (OB_SUCC(ret)) {
|
||||
if (escape_char != ObDataInFileStruct::DEFAULT_FIELD_ESCAPED_CHAR) {
|
||||
ret = OB_WRONG_FIELD_TERMINATORS;
|
||||
LOG_USER_ERROR(OB_WRONG_FIELD_TERMINATORS);
|
||||
}
|
||||
}
|
||||
*/
|
||||
if (OB_SUCC(ret)) {
|
||||
const char* is_ambiguous_field_sep = strchr("ntrb0ZN", static_cast<int>(field_sep_char));
|
||||
const char* is_unsafe_field_sep = strchr(".0123456789e+-", static_cast<int>(field_sep_char));
|
||||
|
Reference in New Issue
Block a user