statement_digest、statement_digest_text等处理latin1字符有乱>码,报错1064
This commit is contained in:
@ -449,6 +449,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
|
||||
// Most of the time, if it is not an identifier character, it maybe a space,
|
||||
// comma, opening parenthesis, or closing parenthesis. This judgment logic is
|
||||
// added here to avoid the next judgment whether it is utf8 char or gbk char
|
||||
} else if (!is_oracle_mode_) {
|
||||
idf_pos = notascii_gb_char(pos);
|
||||
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
|
||||
idf_pos = is_utf8_char(pos);
|
||||
} else if (ObCharset::is_gb_charset(charset_type_)) {
|
||||
@ -921,6 +923,17 @@ int ObFastParserBase::get_one_insert_row_str(ObRawSql &raw_sql,
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::notascii_gb_char(const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (notascii(raw_sql_.char_at(pos))) {
|
||||
idf_pos = pos + 1;
|
||||
} else {
|
||||
idf_pos = is_gbk_char(pos);
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
@ -1672,6 +1685,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
|
||||
// Most of the time, if it is not an identifier character, it maybe a space,
|
||||
// comma, opening parenthesis, or closing parenthesis. This judgment logic is
|
||||
// added here to avoid the next judgment whether it is utf8 char or gbk char
|
||||
} else if (!is_oracle_mode_) {
|
||||
idf_pos = notascii_gb_char(pos);
|
||||
} else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) {
|
||||
idf_pos = is_utf8_char(pos);
|
||||
} else if (ObCharset::is_gb_charset(charset_type_)) {
|
||||
|
||||
@ -381,6 +381,7 @@ protected:
|
||||
return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)];
|
||||
}
|
||||
void reset_parser_node(ParseNode *node);
|
||||
int64_t notascii_gb_char(const int64_t pos);
|
||||
//{U}
|
||||
int64_t is_latin1_char(const int64_t pos);
|
||||
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
|
||||
@ -440,6 +441,11 @@ protected:
|
||||
return is_valid_char(ch) &&
|
||||
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
|
||||
}
|
||||
inline bool notascii(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
(static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF);
|
||||
}
|
||||
inline bool is_latin1(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
|
||||
@ -38,14 +38,12 @@ extern void obsql_mysql_parser_fatal_error(int32_t errcoyyde, yyscan_t yyscanner
|
||||
/* the adq is used to process dq in ANSI_QUOTES sql_mode*/
|
||||
%x adq
|
||||
|
||||
U [\x80-\xbf]
|
||||
U_2 [\xc2-\xdf]
|
||||
U_3 [\xe0-\xef]
|
||||
U_4 [\xf0-\xf4]
|
||||
NOTASCII [\x80-\xFF]
|
||||
GB_1 [\x81-\xfe]
|
||||
GB_2 [\x40-\xfe]
|
||||
GB_3 [\x30-\x39]
|
||||
UTF8_GB_CHAR ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3})
|
||||
NOTASCII_GB_CHAR ({NOTASCII}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3})
|
||||
|
||||
space [ \t\n\r\f]
|
||||
non_newline [^\n\r]
|
||||
sql_comment ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r])
|
||||
@ -62,7 +60,7 @@ common_hint_begin (\/\*\+({space}*hint{space}+)?)
|
||||
c_cmt_begin \/\*
|
||||
c_cmt_end \*+\/
|
||||
comment ({sql_comment})
|
||||
identifier (([A-Za-z0-9$_]|{UTF8_GB_CHAR})+)
|
||||
identifier (([A-Za-z0-9$_]|{NOTASCII_GB_CHAR})+)
|
||||
system_variable (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*)
|
||||
user_variable (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*)
|
||||
version_num ([0-9]+\.+[0-9]*)
|
||||
|
||||
Reference in New Issue
Block a user