[CP] fix gbk parser behaviour not compatible with mysql
This commit is contained in:
		@ -66,7 +66,7 @@ typedef struct _ObParseCtx
 | 
				
			|||||||
  int comp_mode_;
 | 
					  int comp_mode_;
 | 
				
			||||||
  bool is_not_utf8_connection_;
 | 
					  bool is_not_utf8_connection_;
 | 
				
			||||||
  const struct ObCharsetInfo *charset_info_;
 | 
					  const struct ObCharsetInfo *charset_info_;
 | 
				
			||||||
  int64_t last_well_formed_len_;  //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
 | 
					  int64_t last_escape_check_pos_;  //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
 | 
				
			||||||
  int connection_collation_;
 | 
					  int connection_collation_;
 | 
				
			||||||
  bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
 | 
					  bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
 | 
				
			||||||
  struct
 | 
					  struct
 | 
				
			||||||
 | 
				
			|||||||
@ -82,7 +82,7 @@ ObFastParserBase::ObFastParserBase(
 | 
				
			|||||||
  is_batched_multi_stmt_split_on_(enable_batched_multi_stmt),
 | 
					  is_batched_multi_stmt_split_on_(enable_batched_multi_stmt),
 | 
				
			||||||
  is_mysql_compatible_comment_(false),
 | 
					  is_mysql_compatible_comment_(false),
 | 
				
			||||||
  cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0),
 | 
					  cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0),
 | 
				
			||||||
  tmp_buf_(nullptr), tmp_buf_len_(0), last_well_formed_len_(0),
 | 
					  tmp_buf_(nullptr), tmp_buf_len_(0), last_escape_check_pos_(0),
 | 
				
			||||||
  param_node_list_(nullptr), tail_param_node_(nullptr),
 | 
					  param_node_list_(nullptr), tail_param_node_(nullptr),
 | 
				
			||||||
  cur_token_type_(INVALID_TOKEN), allocator_(allocator),
 | 
					  cur_token_type_(INVALID_TOKEN), allocator_(allocator),
 | 
				
			||||||
  parse_next_token_func_(nullptr), process_idf_func_(nullptr)
 | 
					  parse_next_token_func_(nullptr), process_idf_func_(nullptr)
 | 
				
			||||||
@ -1382,22 +1382,21 @@ inline void ObFastParserBase::check_real_escape(bool &is_real_escape)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) {
 | 
					  if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) {
 | 
				
			||||||
    char *cur_pos = tmp_buf_ + tmp_buf_len_;
 | 
					    char *cur_pos = tmp_buf_ + tmp_buf_len_;
 | 
				
			||||||
    char *last_check_pos = tmp_buf_ + last_well_formed_len_;
 | 
					    char *last_check_pos = tmp_buf_ + last_escape_check_pos_;
 | 
				
			||||||
    int error = 0;
 | 
					    int error = 0;
 | 
				
			||||||
    int expected_well_formed_len = cur_pos - last_check_pos;
 | 
					    int expected_well_formed_len = cur_pos - last_check_pos;
 | 
				
			||||||
    int real_well_formed_len = charset_info_->cset->well_formed_len(
 | 
					
 | 
				
			||||||
                charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error);
 | 
					    while (last_check_pos < cur_pos) {
 | 
				
			||||||
    if (error != 0) {
 | 
					      size_t real_well_formed_len = charset_info_->cset->well_formed_len(
 | 
				
			||||||
 | 
					                  charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error);
 | 
				
			||||||
 | 
					      last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (error != 0) { //the final well-formed result
 | 
				
			||||||
      *cur_pos = '\\';
 | 
					      *cur_pos = '\\';
 | 
				
			||||||
      if (real_well_formed_len == expected_well_formed_len - 1
 | 
					      if (charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) {
 | 
				
			||||||
          && charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) {
 | 
					 | 
				
			||||||
        is_real_escape = false;
 | 
					        is_real_escape = false;
 | 
				
			||||||
        last_well_formed_len_ = tmp_buf_len_ + 1;
 | 
					 | 
				
			||||||
      } else {
 | 
					 | 
				
			||||||
        last_well_formed_len_ = tmp_buf_len_;
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      last_well_formed_len_ = tmp_buf_len_;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -1826,7 +1825,7 @@ int ObFastParserMysql::process_string(const char quote)
 | 
				
			|||||||
  ParseNode **child_node = NULL;
 | 
					  ParseNode **child_node = NULL;
 | 
				
			||||||
  char ch = INVALID_CHAR;
 | 
					  char ch = INVALID_CHAR;
 | 
				
			||||||
  tmp_buf_len_ = 0;
 | 
					  tmp_buf_len_ = 0;
 | 
				
			||||||
  last_well_formed_len_ = 0;
 | 
					  last_escape_check_pos_ = 0;
 | 
				
			||||||
  if (nullptr == tmp_buf_ &&
 | 
					  if (nullptr == tmp_buf_ &&
 | 
				
			||||||
      OB_ISNULL(tmp_buf_ = static_cast<char *>(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) {
 | 
					      OB_ISNULL(tmp_buf_ = static_cast<char *>(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) {
 | 
				
			||||||
    ret = OB_ALLOCATE_MEMORY_FAILED;
 | 
					    ret = OB_ALLOCATE_MEMORY_FAILED;
 | 
				
			||||||
@ -1855,6 +1854,7 @@ int ObFastParserMysql::process_string(const char quote)
 | 
				
			|||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
          process_escape_string(tmp_buf_, tmp_buf_len_);
 | 
					          process_escape_string(tmp_buf_, tmp_buf_len_);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					        last_escape_check_pos_ = tmp_buf_len_;
 | 
				
			||||||
      } else if (quote == ch) {
 | 
					      } else if (quote == ch) {
 | 
				
			||||||
        if (quote == raw_sql_.peek()) { // double quote
 | 
					        if (quote == raw_sql_.peek()) { // double quote
 | 
				
			||||||
          ch = raw_sql_.scan();
 | 
					          ch = raw_sql_.scan();
 | 
				
			||||||
 | 
				
			|||||||
@ -495,7 +495,7 @@ protected:
 | 
				
			|||||||
	int64_t copy_end_pos_;
 | 
						int64_t copy_end_pos_;
 | 
				
			||||||
	char *tmp_buf_;
 | 
						char *tmp_buf_;
 | 
				
			||||||
	int64_t tmp_buf_len_;
 | 
						int64_t tmp_buf_len_;
 | 
				
			||||||
	int64_t last_well_formed_len_;
 | 
						int64_t last_escape_check_pos_;
 | 
				
			||||||
	ParamList *param_node_list_;
 | 
						ParamList *param_node_list_;
 | 
				
			||||||
	ParamList *tail_param_node_;
 | 
						ParamList *tail_param_node_;
 | 
				
			||||||
	TokenType cur_token_type_;
 | 
						TokenType cur_token_type_;
 | 
				
			||||||
 | 
				
			|||||||
@ -267,27 +267,24 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
					bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
				
			||||||
                       int64_t *last_well_formed_len)
 | 
					                       int64_t last_escape_check_pos)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  bool is_real_escape = true;
 | 
					  bool is_real_escape = true;
 | 
				
			||||||
  if (NULL != cs && NULL != last_well_formed_len && cs->escape_with_backslash_is_dangerous) {
 | 
					  if (NULL != cs && cs->escape_with_backslash_is_dangerous) {
 | 
				
			||||||
    char *cur_pos = str + str_len;
 | 
					    char *cur_pos = str + str_len;
 | 
				
			||||||
    char *last_check_pos = str + *last_well_formed_len;
 | 
					    char *last_check_pos = str + last_escape_check_pos;
 | 
				
			||||||
    int error = 0;
 | 
					    int error = 0;
 | 
				
			||||||
    size_t expected_well_formed_len = cur_pos - last_check_pos;
 | 
					    size_t expected_well_formed_len = cur_pos - last_check_pos;
 | 
				
			||||||
    size_t real_well_formed_len = cs->cset->well_formed_len(
 | 
					    while (last_check_pos < cur_pos) {
 | 
				
			||||||
                cs, last_check_pos, cur_pos, UINT64_MAX, &error);
 | 
					      size_t real_well_formed_len = cs->cset->well_formed_len(
 | 
				
			||||||
    if (error != 0) {
 | 
					                  cs, last_check_pos, cur_pos, UINT64_MAX, &error);
 | 
				
			||||||
 | 
					      last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (error != 0) { //the final well-formed result
 | 
				
			||||||
      *cur_pos = '\\';
 | 
					      *cur_pos = '\\';
 | 
				
			||||||
      if (real_well_formed_len == expected_well_formed_len - 1
 | 
					      if (cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) {
 | 
				
			||||||
          && cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) {
 | 
					 | 
				
			||||||
        is_real_escape = false;
 | 
					        is_real_escape = false;
 | 
				
			||||||
        *last_well_formed_len = str_len + 1;
 | 
					 | 
				
			||||||
      } else {
 | 
					 | 
				
			||||||
        *last_well_formed_len = str_len;
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      *last_well_formed_len = str_len;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return is_real_escape;
 | 
					  return is_real_escape;
 | 
				
			||||||
 | 
				
			|||||||
@ -35,7 +35,7 @@ extern char *cp_str_value(const char *src, const size_t nbyte, void *malloc_pool
 | 
				
			|||||||
extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_,
 | 
					extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_,
 | 
				
			||||||
                                                       void *malloc_pool, int64_t *out_len);
 | 
					                                                       void *malloc_pool, int64_t *out_len);
 | 
				
			||||||
extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
					extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
				
			||||||
                              int64_t *last_well_formed_len);
 | 
					                              int64_t last_escape_check_pos);
 | 
				
			||||||
extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size);
 | 
					extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool);
 | 
					extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool);
 | 
				
			||||||
 | 
				
			|||||||
@ -321,7 +321,7 @@ typedef struct
 | 
				
			|||||||
  PLParseInfo pl_parse_info_;
 | 
					  PLParseInfo pl_parse_info_;
 | 
				
			||||||
  /*for  q-quote*/
 | 
					  /*for  q-quote*/
 | 
				
			||||||
  ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value
 | 
					  ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value
 | 
				
			||||||
  int64_t last_well_formed_len_;  //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
 | 
					  int64_t last_escape_check_pos_;  //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
 | 
				
			||||||
  int connection_collation_;//connection collation
 | 
					  int connection_collation_;//connection collation
 | 
				
			||||||
  bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
 | 
					  bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
 | 
				
			||||||
  bool enable_compatible_comment_;
 | 
					  bool enable_compatible_comment_;
 | 
				
			||||||
 | 
				
			|||||||
@ -64,7 +64,7 @@ extern int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, in
 | 
				
			|||||||
extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);
 | 
					extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);
 | 
				
			||||||
extern int store_prentthese_info(int left, int right, ParseResult *result);
 | 
					extern int store_prentthese_info(int left, int right, ParseResult *result);
 | 
				
			||||||
extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
					extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
 | 
				
			||||||
                              int64_t *last_well_formed_len);
 | 
					                              int64_t last_escape_check_pos);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
int add_alias_name(ParseNode *node, ParseResult *result, int end);
 | 
					int add_alias_name(ParseNode *node, ParseResult *result, int end);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -692,7 +692,7 @@ do {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define CHECK_REAL_ESCAPE(is_real_escape)                                         \
 | 
					#define CHECK_REAL_ESCAPE(is_real_escape)                                         \
 | 
				
			||||||
  is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_,           \
 | 
					  is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_,           \
 | 
				
			||||||
                                     yylval->node->str_len_, &(p->last_well_formed_len_))
 | 
					                                     yylval->node->str_len_, p->last_escape_check_pos_)
 | 
				
			||||||
  /*
 | 
					  /*
 | 
				
			||||||
do {                                                                              \
 | 
					do {                                                                              \
 | 
				
			||||||
  if (NULL !=  p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \
 | 
					  if (NULL !=  p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \
 | 
				
			||||||
 | 
				
			|||||||
@ -264,7 +264,7 @@ FALSE {
 | 
				
			|||||||
  check_value(yylval);
 | 
					  check_value(yylval);
 | 
				
			||||||
  malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
					  malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
				
			||||||
  yylval->node->str_len_ = 0;
 | 
					  yylval->node->str_len_ = 0;
 | 
				
			||||||
  p->last_well_formed_len_ = 0;
 | 
					  p->last_escape_check_pos_ = 0;
 | 
				
			||||||
  yylval->node->str_value_ = NULL;
 | 
					  yylval->node->str_value_ = NULL;
 | 
				
			||||||
  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
					  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
				
			||||||
    COPY_WRITE();
 | 
					    COPY_WRITE();
 | 
				
			||||||
@ -327,6 +327,7 @@ FALSE {
 | 
				
			|||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    HANDLE_ESCAPE(p);
 | 
					    HANDLE_ESCAPE(p);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  p->last_escape_check_pos_ = yylval->node->str_len_;
 | 
				
			||||||
  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
					  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
				
			||||||
    COPY_WRITE();
 | 
					    COPY_WRITE();
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@ -408,7 +409,7 @@ FALSE {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
					    malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
				
			||||||
    yylval->node->str_len_ = 0;
 | 
					    yylval->node->str_len_ = 0;
 | 
				
			||||||
    p->last_well_formed_len_ = 0;
 | 
					    p->last_escape_check_pos_ = 0;
 | 
				
			||||||
    if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
					    if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
				
			||||||
      return OUTLINE_DEFAULT_TOKEN;
 | 
					      return OUTLINE_DEFAULT_TOKEN;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -476,6 +477,7 @@ FALSE {
 | 
				
			|||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    HANDLE_ESCAPE(p);
 | 
					    HANDLE_ESCAPE(p);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  p->last_escape_check_pos_ = yylval->node->str_len_;
 | 
				
			||||||
  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
					  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
 | 
				
			||||||
    return OUTLINE_DEFAULT_TOKEN;
 | 
					    return OUTLINE_DEFAULT_TOKEN;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@ -1059,7 +1061,7 @@ Timestamp{whitespace}?\"[^\"]*\" {
 | 
				
			|||||||
  check_value(yylval);
 | 
					  check_value(yylval);
 | 
				
			||||||
  malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
					  malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
 | 
				
			||||||
  yylval->node->str_len_ = 0;
 | 
					  yylval->node->str_len_ = 0;
 | 
				
			||||||
  p->last_well_formed_len_ = 0;
 | 
					  p->last_escape_check_pos_ = 0;
 | 
				
			||||||
  yylval->node->str_value_ = NULL;
 | 
					  yylval->node->str_value_ = NULL;
 | 
				
			||||||
  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) {
 | 
					  if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) {
 | 
				
			||||||
    COPY_WRITE();
 | 
					    COPY_WRITE();
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user