diff --git a/src/pl/parser/parse_stmt_node.h b/src/pl/parser/parse_stmt_node.h index 4b06fbac64..11dec8275b 100644 --- a/src/pl/parser/parse_stmt_node.h +++ b/src/pl/parser/parse_stmt_node.h @@ -66,7 +66,7 @@ typedef struct _ObParseCtx int comp_mode_; bool is_not_utf8_connection_; const struct ObCharsetInfo *charset_info_; - int64_t last_well_formed_len_; //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 + int64_t last_escape_check_pos_; //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 int connection_collation_; bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" struct diff --git a/src/sql/parser/ob_fast_parser.cpp b/src/sql/parser/ob_fast_parser.cpp index 2eb66cce4c..4b453073de 100644 --- a/src/sql/parser/ob_fast_parser.cpp +++ b/src/sql/parser/ob_fast_parser.cpp @@ -82,7 +82,7 @@ ObFastParserBase::ObFastParserBase( is_batched_multi_stmt_split_on_(enable_batched_multi_stmt), is_mysql_compatible_comment_(false), cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0), - tmp_buf_(nullptr), tmp_buf_len_(0), last_well_formed_len_(0), + tmp_buf_(nullptr), tmp_buf_len_(0), last_escape_check_pos_(0), param_node_list_(nullptr), tail_param_node_(nullptr), cur_token_type_(INVALID_TOKEN), allocator_(allocator), parse_next_token_func_(nullptr), process_idf_func_(nullptr) @@ -1382,22 +1382,21 @@ inline void ObFastParserBase::check_real_escape(bool &is_real_escape) { if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) { char *cur_pos = tmp_buf_ + tmp_buf_len_; - char *last_check_pos = tmp_buf_ + last_well_formed_len_; + char *last_check_pos = tmp_buf_ + last_escape_check_pos_; int error = 0; int expected_well_formed_len = cur_pos - last_check_pos; - int real_well_formed_len = charset_info_->cset->well_formed_len( - charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error); - if (error != 0) { + + while (last_check_pos < cur_pos) { + size_t real_well_formed_len = charset_info_->cset->well_formed_len( + charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error); + last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0)); + } + + if (error != 0) { //the final well-formed result *cur_pos = '\\'; - if (real_well_formed_len == expected_well_formed_len - 1 - && charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) { + if (charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) { is_real_escape = false; - last_well_formed_len_ = tmp_buf_len_ + 1; - } else { - last_well_formed_len_ = tmp_buf_len_; } - } else { - last_well_formed_len_ = tmp_buf_len_; } } } @@ -1826,7 +1825,7 @@ int ObFastParserMysql::process_string(const char quote) ParseNode **child_node = NULL; char ch = INVALID_CHAR; tmp_buf_len_ = 0; - last_well_formed_len_ = 0; + last_escape_check_pos_ = 0; if (nullptr == tmp_buf_ && OB_ISNULL(tmp_buf_ = static_cast(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -1855,6 +1854,7 @@ int ObFastParserMysql::process_string(const char quote) } else { process_escape_string(tmp_buf_, tmp_buf_len_); } + last_escape_check_pos_ = tmp_buf_len_; } else if (quote == ch) { if (quote == raw_sql_.peek()) { // double quote ch = raw_sql_.scan(); diff --git a/src/sql/parser/ob_fast_parser.h b/src/sql/parser/ob_fast_parser.h index 4b6db13f1b..0922ad8e02 100644 --- a/src/sql/parser/ob_fast_parser.h +++ b/src/sql/parser/ob_fast_parser.h @@ -495,7 +495,7 @@ protected: int64_t copy_end_pos_; char *tmp_buf_; int64_t tmp_buf_len_; - int64_t last_well_formed_len_; + int64_t last_escape_check_pos_; ParamList *param_node_list_; ParamList *tail_param_node_; TokenType cur_token_type_; diff --git a/src/sql/parser/parse_malloc.cpp b/src/sql/parser/parse_malloc.cpp index 7a9585c008..4b12035b96 100644 --- a/src/sql/parser/parse_malloc.cpp +++ b/src/sql/parser/parse_malloc.cpp @@ -267,27 +267,24 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection } bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len, - int64_t *last_well_formed_len) + int64_t last_escape_check_pos) { bool is_real_escape = true; - if (NULL != cs && NULL != last_well_formed_len && cs->escape_with_backslash_is_dangerous) { + if (NULL != cs && cs->escape_with_backslash_is_dangerous) { char *cur_pos = str + str_len; - char *last_check_pos = str + *last_well_formed_len; + char *last_check_pos = str + last_escape_check_pos; int error = 0; size_t expected_well_formed_len = cur_pos - last_check_pos; - size_t real_well_formed_len = cs->cset->well_formed_len( - cs, last_check_pos, cur_pos, UINT64_MAX, &error); - if (error != 0) { + while (last_check_pos < cur_pos) { + size_t real_well_formed_len = cs->cset->well_formed_len( + cs, last_check_pos, cur_pos, UINT64_MAX, &error); + last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0)); + } + if (error != 0) { //the final well-formed result *cur_pos = '\\'; - if (real_well_formed_len == expected_well_formed_len - 1 - && cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) { + if (cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) { is_real_escape = false; - *last_well_formed_len = str_len + 1; - } else { - *last_well_formed_len = str_len; } - } else { - *last_well_formed_len = str_len; } } return is_real_escape; diff --git a/src/sql/parser/parse_malloc.h b/src/sql/parser/parse_malloc.h index 193347be87..feeee14c58 100644 --- a/src/sql/parser/parse_malloc.h +++ b/src/sql/parser/parse_malloc.h @@ -35,7 +35,7 @@ extern char *cp_str_value(const char *src, const size_t nbyte, void *malloc_pool extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_, void *malloc_pool, int64_t *out_len); extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, - int64_t *last_well_formed_len); + int64_t last_escape_check_pos); extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size); extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool); diff --git a/src/sql/parser/parse_node.h b/src/sql/parser/parse_node.h index 34c8af527a..e66602c4c6 100644 --- a/src/sql/parser/parse_node.h +++ b/src/sql/parser/parse_node.h @@ -321,7 +321,7 @@ typedef struct PLParseInfo pl_parse_info_; /*for q-quote*/ ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value - int64_t last_well_formed_len_; //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 + int64_t last_escape_check_pos_; //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 int connection_collation_;//connection collation bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" bool enable_compatible_comment_; diff --git a/src/sql/parser/sql_parser_base.h b/src/sql/parser/sql_parser_base.h index 7b09de3520..4c105b2e36 100644 --- a/src/sql/parser/sql_parser_base.h +++ b/src/sql/parser/sql_parser_base.h @@ -64,7 +64,7 @@ extern int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, in extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err); extern int store_prentthese_info(int left, int right, ParseResult *result); extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, - int64_t *last_well_formed_len); + int64_t last_escape_check_pos); int add_alias_name(ParseNode *node, ParseResult *result, int end); @@ -692,7 +692,7 @@ do { #define CHECK_REAL_ESCAPE(is_real_escape) \ is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_, \ - yylval->node->str_len_, &(p->last_well_formed_len_)) + yylval->node->str_len_, p->last_escape_check_pos_) /* do { \ if (NULL != p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \ diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index 7d0ca2ff1f..cc27b6c93e 100644 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -264,7 +264,7 @@ FALSE { check_value(yylval); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); yylval->node->str_len_ = 0; - p->last_well_formed_len_ = 0; + p->last_escape_check_pos_ = 0; yylval->node->str_value_ = NULL; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { COPY_WRITE(); @@ -327,6 +327,7 @@ FALSE { } else { HANDLE_ESCAPE(p); } + p->last_escape_check_pos_ = yylval->node->str_len_; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { COPY_WRITE(); } @@ -408,7 +409,7 @@ FALSE { } malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); yylval->node->str_len_ = 0; - p->last_well_formed_len_ = 0; + p->last_escape_check_pos_ = 0; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { return OUTLINE_DEFAULT_TOKEN; } @@ -476,6 +477,7 @@ FALSE { } else { HANDLE_ESCAPE(p); } + p->last_escape_check_pos_ = yylval->node->str_len_; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { return OUTLINE_DEFAULT_TOKEN; } @@ -1059,7 +1061,7 @@ Timestamp{whitespace}?\"[^\"]*\" { check_value(yylval); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); yylval->node->str_len_ = 0; - p->last_well_formed_len_ = 0; + p->last_escape_check_pos_ = 0; yylval->node->str_value_ = NULL; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) { COPY_WRITE();