[CP] fix gbk parser behaviour not compatible with mysql

This commit is contained in:
obdev
2022-11-03 08:39:29 +00:00
committed by wangzelin.wzl
parent 0feea46b52
commit a4317d144d
8 changed files with 34 additions and 35 deletions

View File

@ -66,7 +66,7 @@ typedef struct _ObParseCtx
int comp_mode_; int comp_mode_;
bool is_not_utf8_connection_; bool is_not_utf8_connection_;
const struct ObCharsetInfo *charset_info_; const struct ObCharsetInfo *charset_info_;
int64_t last_well_formed_len_; //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 int64_t last_escape_check_pos_; //解析quoted string时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
int connection_collation_; int connection_collation_;
bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
struct struct

View File

@ -82,7 +82,7 @@ ObFastParserBase::ObFastParserBase(
is_batched_multi_stmt_split_on_(enable_batched_multi_stmt), is_batched_multi_stmt_split_on_(enable_batched_multi_stmt),
is_mysql_compatible_comment_(false), is_mysql_compatible_comment_(false),
cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0), cur_token_begin_pos_(0), copy_begin_pos_(0), copy_end_pos_(0),
tmp_buf_(nullptr), tmp_buf_len_(0), last_well_formed_len_(0), tmp_buf_(nullptr), tmp_buf_len_(0), last_escape_check_pos_(0),
param_node_list_(nullptr), tail_param_node_(nullptr), param_node_list_(nullptr), tail_param_node_(nullptr),
cur_token_type_(INVALID_TOKEN), allocator_(allocator), cur_token_type_(INVALID_TOKEN), allocator_(allocator),
parse_next_token_func_(nullptr), process_idf_func_(nullptr) parse_next_token_func_(nullptr), process_idf_func_(nullptr)
@ -1382,22 +1382,21 @@ inline void ObFastParserBase::check_real_escape(bool &is_real_escape)
{ {
if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) { if (OB_NOT_NULL(charset_info_) && charset_info_->escape_with_backslash_is_dangerous) {
char *cur_pos = tmp_buf_ + tmp_buf_len_; char *cur_pos = tmp_buf_ + tmp_buf_len_;
char *last_check_pos = tmp_buf_ + last_well_formed_len_; char *last_check_pos = tmp_buf_ + last_escape_check_pos_;
int error = 0; int error = 0;
int expected_well_formed_len = cur_pos - last_check_pos; int expected_well_formed_len = cur_pos - last_check_pos;
int real_well_formed_len = charset_info_->cset->well_formed_len(
charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error); while (last_check_pos < cur_pos) {
if (error != 0) { size_t real_well_formed_len = charset_info_->cset->well_formed_len(
charset_info_, last_check_pos, cur_pos, UINT64_MAX, &error);
last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0));
}
if (error != 0) { //the final well-formed result
*cur_pos = '\\'; *cur_pos = '\\';
if (real_well_formed_len == expected_well_formed_len - 1 if (charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) {
&& charset_info_->cset->ismbchar(charset_info_, cur_pos - 1, cur_pos + 1)) {
is_real_escape = false; is_real_escape = false;
last_well_formed_len_ = tmp_buf_len_ + 1;
} else {
last_well_formed_len_ = tmp_buf_len_;
} }
} else {
last_well_formed_len_ = tmp_buf_len_;
} }
} }
} }
@ -1826,7 +1825,7 @@ int ObFastParserMysql::process_string(const char quote)
ParseNode **child_node = NULL; ParseNode **child_node = NULL;
char ch = INVALID_CHAR; char ch = INVALID_CHAR;
tmp_buf_len_ = 0; tmp_buf_len_ = 0;
last_well_formed_len_ = 0; last_escape_check_pos_ = 0;
if (nullptr == tmp_buf_ && if (nullptr == tmp_buf_ &&
OB_ISNULL(tmp_buf_ = static_cast<char *>(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) { OB_ISNULL(tmp_buf_ = static_cast<char *>(allocator_.alloc(raw_sql_.raw_sql_len_ + 1)))) {
ret = OB_ALLOCATE_MEMORY_FAILED; ret = OB_ALLOCATE_MEMORY_FAILED;
@ -1855,6 +1854,7 @@ int ObFastParserMysql::process_string(const char quote)
} else { } else {
process_escape_string(tmp_buf_, tmp_buf_len_); process_escape_string(tmp_buf_, tmp_buf_len_);
} }
last_escape_check_pos_ = tmp_buf_len_;
} else if (quote == ch) { } else if (quote == ch) {
if (quote == raw_sql_.peek()) { // double quote if (quote == raw_sql_.peek()) { // double quote
ch = raw_sql_.scan(); ch = raw_sql_.scan();

View File

@ -495,7 +495,7 @@ protected:
int64_t copy_end_pos_; int64_t copy_end_pos_;
char *tmp_buf_; char *tmp_buf_;
int64_t tmp_buf_len_; int64_t tmp_buf_len_;
int64_t last_well_formed_len_; int64_t last_escape_check_pos_;
ParamList *param_node_list_; ParamList *param_node_list_;
ParamList *tail_param_node_; ParamList *tail_param_node_;
TokenType cur_token_type_; TokenType cur_token_type_;

View File

@ -267,27 +267,24 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
} }
bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len, bool check_real_escape(const ObCharsetInfo *cs, char *str, int64_t str_len,
int64_t *last_well_formed_len) int64_t last_escape_check_pos)
{ {
bool is_real_escape = true; bool is_real_escape = true;
if (NULL != cs && NULL != last_well_formed_len && cs->escape_with_backslash_is_dangerous) { if (NULL != cs && cs->escape_with_backslash_is_dangerous) {
char *cur_pos = str + str_len; char *cur_pos = str + str_len;
char *last_check_pos = str + *last_well_formed_len; char *last_check_pos = str + last_escape_check_pos;
int error = 0; int error = 0;
size_t expected_well_formed_len = cur_pos - last_check_pos; size_t expected_well_formed_len = cur_pos - last_check_pos;
size_t real_well_formed_len = cs->cset->well_formed_len( while (last_check_pos < cur_pos) {
cs, last_check_pos, cur_pos, UINT64_MAX, &error); size_t real_well_formed_len = cs->cset->well_formed_len(
if (error != 0) { cs, last_check_pos, cur_pos, UINT64_MAX, &error);
last_check_pos += (real_well_formed_len + ((error != 0) ? 1 : 0));
}
if (error != 0) { //the final well-formed result
*cur_pos = '\\'; *cur_pos = '\\';
if (real_well_formed_len == expected_well_formed_len - 1 if (cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) {
&& cs->cset->ismbchar(cs, cur_pos - 1, cur_pos + 1)) {
is_real_escape = false; is_real_escape = false;
*last_well_formed_len = str_len + 1;
} else {
*last_well_formed_len = str_len;
} }
} else {
*last_well_formed_len = str_len;
} }
} }
return is_real_escape; return is_real_escape;

View File

@ -35,7 +35,7 @@ extern char *cp_str_value(const char *src, const size_t nbyte, void *malloc_pool
extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_, extern char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection_collation_,
void *malloc_pool, int64_t *out_len); void *malloc_pool, int64_t *out_len);
extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
int64_t *last_well_formed_len); int64_t last_escape_check_pos);
extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size); extern void *parser_alloc(void *malloc_pool, const int64_t alloc_size);
extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool); extern void *malloc_parentheses_info(const size_t nbyte, void *malloc_pool);

View File

@ -321,7 +321,7 @@ typedef struct
PLParseInfo pl_parse_info_; PLParseInfo pl_parse_info_;
/*for q-quote*/ /*for q-quote*/
ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value
int64_t last_well_formed_len_; //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题 int64_t last_escape_check_pos_; //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
int connection_collation_;//connection collation int connection_collation_;//connection collation
bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */" bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
bool enable_compatible_comment_; bool enable_compatible_comment_;

View File

@ -64,7 +64,7 @@ extern int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, in
extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err); extern int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);
extern int store_prentthese_info(int left, int right, ParseResult *result); extern int store_prentthese_info(int left, int right, ParseResult *result);
extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len, extern bool check_real_escape(const struct ObCharsetInfo *cs, char *str, int64_t str_len,
int64_t *last_well_formed_len); int64_t last_escape_check_pos);
int add_alias_name(ParseNode *node, ParseResult *result, int end); int add_alias_name(ParseNode *node, ParseResult *result, int end);
@ -692,7 +692,7 @@ do {
#define CHECK_REAL_ESCAPE(is_real_escape) \ #define CHECK_REAL_ESCAPE(is_real_escape) \
is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_, \ is_real_escape = check_real_escape(p->charset_info_, p->tmp_literal_, \
yylval->node->str_len_, &(p->last_well_formed_len_)) yylval->node->str_len_, p->last_escape_check_pos_)
/* /*
do { \ do { \
if (NULL != p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \ if (NULL != p->charset_info_ && p->charset_info_->escape_with_backslash_is_dangerous) { \

View File

@ -264,7 +264,7 @@ FALSE {
check_value(yylval); check_value(yylval);
malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
yylval->node->str_len_ = 0; yylval->node->str_len_ = 0;
p->last_well_formed_len_ = 0; p->last_escape_check_pos_ = 0;
yylval->node->str_value_ = NULL; yylval->node->str_value_ = NULL;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
COPY_WRITE(); COPY_WRITE();
@ -327,6 +327,7 @@ FALSE {
} else { } else {
HANDLE_ESCAPE(p); HANDLE_ESCAPE(p);
} }
p->last_escape_check_pos_ = yylval->node->str_len_;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
COPY_WRITE(); COPY_WRITE();
} }
@ -408,7 +409,7 @@ FALSE {
} }
malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
yylval->node->str_len_ = 0; yylval->node->str_len_ = 0;
p->last_well_formed_len_ = 0; p->last_escape_check_pos_ = 0;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
return OUTLINE_DEFAULT_TOKEN; return OUTLINE_DEFAULT_TOKEN;
} }
@ -476,6 +477,7 @@ FALSE {
} else { } else {
HANDLE_ESCAPE(p); HANDLE_ESCAPE(p);
} }
p->last_escape_check_pos_ = yylval->node->str_len_;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
return OUTLINE_DEFAULT_TOKEN; return OUTLINE_DEFAULT_TOKEN;
} }
@ -1059,7 +1061,7 @@ Timestamp{whitespace}?\"[^\"]*\" {
check_value(yylval); check_value(yylval);
malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0);
yylval->node->str_len_ = 0; yylval->node->str_len_ = 0;
p->last_well_formed_len_ = 0; p->last_escape_check_pos_ = 0;
yylval->node->str_value_ = NULL; yylval->node->str_value_ = NULL;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) { if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE && !p->is_ignore_token_) {
COPY_WRITE(); COPY_WRITE();