[CP] fix gbk parser behaviour not compatible with mysql
This commit is contained in:
		| @ -66,7 +66,7 @@ typedef struct _ObParseCtx | ||||
|   int comp_mode_; | ||||
|   bool is_not_utf8_connection_; | ||||
|   const struct ObCharsetInfo *charset_info_; | ||||
|   int64_t last_well_formed_len_;  //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 | ||||
|   int64_t last_escape_check_pos_;  //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 | ||||
|   int connection_collation_; | ||||
|   bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" | ||||
|   struct | ||||
|  | ||||
| @ -82,7 +82,7 @@ ObFastParserBase::ObFastParserBase( | ||||
|   is_batched_multi_stmt_split_on_(enable_batched_multi_stmt), | ||||
|   is_mysql_compatible_comment_(false), | ||||
|   cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0), | ||||
|   tmp_buf_(nullptr), tmp_buf_len_(0), last_well_formed_len_(0), | ||||
|   tmp_buf_(nullptr), tmp_buf_len_(0), last_escape_check_pos_(0), | ||||
|   param_node_list_(nullptr), tail_param_node_(nullptr), | ||||
|   cur_token_type_(INVALID_TOKEN), allocator_(allocator), | ||||
|   parse_next_token_func_(nullptr), process_idf_func_(nullptr) | ||||
| @ -1382,22 +1382,21 @@ inline void ObFastParserBase::check_real_escape(bool &is_real_escape) | ||||
| { | ||||
|   if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) { | ||||
|     char *cur_pos = tmp_buf_ + tmp_buf_len_; | ||||
|     char *last_check_pos = tmp_buf_ + last_well_formed_len_; | ||||
|     char *last_check_pos = tmp_buf_ + last_escape_check_pos_; | ||||
|     int error = 0; | ||||
|     int expected_well_formed_len = cur_pos - last_check_pos; | ||||
|     int real_well_formed_len = charset_info_->cset->well_formed_len( | ||||
|                 charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error); | ||||
|     if (error != 0) { | ||||
|  | ||||
|     while (last_check_pos < cur_pos) { | ||||
|       size_t real_well_formed_len = charset_info_->cset->well_formed_len( | ||||
|                   charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error); | ||||
|       last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0)); | ||||
|     } | ||||
|  | ||||
|     if (error != 0) { //the final well-formed result | ||||
|       *cur_pos = '\\'; | ||||
|       if (real_well_formed_len == expected_well_formed_len - 1 | ||||
|           && charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) { | ||||
|       if (charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) { | ||||
|         is_real_escape = false; | ||||
|         last_well_formed_len_ = tmp_buf_len_ + 1; | ||||
|       } else { | ||||
|         last_well_formed_len_ = tmp_buf_len_; | ||||
|       } | ||||
|     } else { | ||||
|       last_well_formed_len_ = tmp_buf_len_; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @ -1826,7 +1825,7 @@ int ObFastParserMysql::process_string(const char quote) | ||||
|   ParseNode **child_node = NULL; | ||||
|   char ch = INVALID_CHAR; | ||||
|   tmp_buf_len_ = 0; | ||||
|   last_well_formed_len_ = 0; | ||||
|   last_escape_check_pos_ = 0; | ||||
|   if (nullptr == tmp_buf_ && | ||||
|       OB_ISNULL(tmp_buf_ = static_cast<char *>(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) { | ||||
|     ret = OB_ALLOCATE_MEMORY_FAILED; | ||||
| @ -1855,6 +1854,7 @@ int ObFastParserMysql::process_string(const char quote) | ||||
|         } else { | ||||
|           process_escape_string(tmp_buf_, tmp_buf_len_); | ||||
|         } | ||||
|         last_escape_check_pos_ = tmp_buf_len_; | ||||
|       } else if (quote == ch) { | ||||
|         if (quote == raw_sql_.peek()) { // double quote | ||||
|           ch = raw_sql_.scan(); | ||||
|  | ||||
| @ -495,7 +495,7 @@ protected: | ||||
| 	int64_t copy_end_pos_; | ||||
| 	char *tmp_buf_; | ||||
| 	int64_t tmp_buf_len_; | ||||
| 	int64_t last_well_formed_len_; | ||||
| 	int64_t last_escape_check_pos_; | ||||
| 	ParamList *param_node_list_; | ||||
| 	ParamList *tail_param_node_; | ||||
| 	TokenType cur_token_type_; | ||||
|  | ||||
| @ -267,27 +267,24 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection | ||||
| } | ||||
|  | ||||
| bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len, | ||||
|                        int64_t *last_well_formed_len) | ||||
|                        int64_t last_escape_check_pos) | ||||
| { | ||||
|   bool is_real_escape = true; | ||||
|   if (NULL != cs && NULL != last_well_formed_len && cs->escape_with_backslash_is_dangerous) { | ||||
|   if (NULL != cs && cs->escape_with_backslash_is_dangerous) { | ||||
|     char *cur_pos = str + str_len; | ||||
|     char *last_check_pos = str + *last_well_formed_len; | ||||
|     char *last_check_pos = str + last_escape_check_pos; | ||||
|     int error = 0; | ||||
|     size_t expected_well_formed_len = cur_pos - last_check_pos; | ||||
|     size_t real_well_formed_len = cs->cset->well_formed_len( | ||||
|                 cs, last_check_pos, cur_pos, UINT64_MAX, &error); | ||||
|     if (error != 0) { | ||||
|     while (last_check_pos < cur_pos) { | ||||
|       size_t real_well_formed_len = cs->cset->well_formed_len( | ||||
|                   cs, last_check_pos, cur_pos, UINT64_MAX, &error); | ||||
|       last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0)); | ||||
|     } | ||||
|     if (error != 0) { //the final well-formed result | ||||
|       *cur_pos = '\\'; | ||||
|       if (real_well_formed_len == expected_well_formed_len - 1 | ||||
|           && cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) { | ||||
|       if (cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) { | ||||
|         is_real_escape = false; | ||||
|         *last_well_formed_len = str_len + 1; | ||||
|       } else { | ||||
|         *last_well_formed_len = str_len; | ||||
|       } | ||||
|     } else { | ||||
|       *last_well_formed_len = str_len; | ||||
|     } | ||||
|   } | ||||
|   return is_real_escape; | ||||
|  | ||||
| @ -35,7 +35,7 @@ extern char *cp_str_value(const char *src, const size_t nbyte, void *malloc_pool | ||||
| extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_, | ||||
|                                                        void *malloc_pool, int64_t *out_len); | ||||
| extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, | ||||
|                               int64_t *last_well_formed_len); | ||||
|                               int64_t last_escape_check_pos); | ||||
| extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size); | ||||
|  | ||||
| extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool); | ||||
|  | ||||
| @ -321,7 +321,7 @@ typedef struct | ||||
|   PLParseInfo pl_parse_info_; | ||||
|   /*for  q-quote*/ | ||||
|   ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value | ||||
|   int64_t last_well_formed_len_;  //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 | ||||
|   int64_t last_escape_check_pos_;  //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 | ||||
|   int connection_collation_;//connection collation | ||||
|   bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" | ||||
|   bool enable_compatible_comment_; | ||||
|  | ||||
| @ -64,7 +64,7 @@ extern int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, in | ||||
| extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err); | ||||
| extern int store_prentthese_info(int left, int right, ParseResult *result); | ||||
| extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, | ||||
|                               int64_t *last_well_formed_len); | ||||
|                               int64_t last_escape_check_pos); | ||||
|  | ||||
| int add_alias_name(ParseNode *node, ParseResult *result, int end); | ||||
|  | ||||
| @ -692,7 +692,7 @@ do { | ||||
|  | ||||
| #define CHECK_REAL_ESCAPE(is_real_escape)                                         \ | ||||
|   is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_,           \ | ||||
|                                      yylval->node->str_len_, &(p->last_well_formed_len_)) | ||||
|                                      yylval->node->str_len_, p->last_escape_check_pos_) | ||||
|   /* | ||||
| do {                                                                              \ | ||||
|   if (NULL !=  p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \ | ||||
|  | ||||
| @ -264,7 +264,7 @@ FALSE { | ||||
|   check_value(yylval); | ||||
|   malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); | ||||
|   yylval->node->str_len_ = 0; | ||||
|   p->last_well_formed_len_ = 0; | ||||
|   p->last_escape_check_pos_ = 0; | ||||
|   yylval->node->str_value_ = NULL; | ||||
|   if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { | ||||
|     COPY_WRITE(); | ||||
| @ -327,6 +327,7 @@ FALSE { | ||||
|   } else { | ||||
|     HANDLE_ESCAPE(p); | ||||
|   } | ||||
|   p->last_escape_check_pos_ = yylval->node->str_len_; | ||||
|   if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { | ||||
|     COPY_WRITE(); | ||||
|   } | ||||
| @ -408,7 +409,7 @@ FALSE { | ||||
|     } | ||||
|     malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); | ||||
|     yylval->node->str_len_ = 0; | ||||
|     p->last_well_formed_len_ = 0; | ||||
|     p->last_escape_check_pos_ = 0; | ||||
|     if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { | ||||
|       return OUTLINE_DEFAULT_TOKEN; | ||||
|     } | ||||
| @ -476,6 +477,7 @@ FALSE { | ||||
|   } else { | ||||
|     HANDLE_ESCAPE(p); | ||||
|   } | ||||
|   p->last_escape_check_pos_ = yylval->node->str_len_; | ||||
|   if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { | ||||
|     return OUTLINE_DEFAULT_TOKEN; | ||||
|   } | ||||
| @ -1059,7 +1061,7 @@ Timestamp{whitespace}?\"[^\"]*\" { | ||||
|   check_value(yylval); | ||||
|   malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); | ||||
|   yylval->node->str_len_ = 0; | ||||
|   p->last_well_formed_len_ = 0; | ||||
|   p->last_escape_check_pos_ = 0; | ||||
|   yylval->node->str_value_ = NULL; | ||||
|   if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) { | ||||
|     COPY_WRITE(); | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 obdev
					obdev