statement_digest、statement_digest_text等处理latin1字符有乱>码,报错1064
This commit is contained in:
		| @ -449,6 +449,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos) | |||||||
|     // Most of the time, if it is not an identifier character, it maybe a space, |     // Most of the time, if it is not an identifier character, it maybe a space, | ||||||
|     // comma, opening parenthesis, or closing parenthesis. This judgment logic is |     // comma, opening parenthesis, or closing parenthesis. This judgment logic is | ||||||
|     // added here to avoid the next judgment whether it is utf8 char or gbk char |     // added here to avoid the next judgment whether it is utf8 char or gbk char | ||||||
|  |   } else if (!is_oracle_mode_) { | ||||||
|  |     idf_pos = notascii_gb_char(pos); | ||||||
|   } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { |   } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { | ||||||
|     idf_pos = is_utf8_char(pos); |     idf_pos = is_utf8_char(pos); | ||||||
|   } else if (ObCharset::is_gb_charset(charset_type_)) { |   } else if (ObCharset::is_gb_charset(charset_type_)) { | ||||||
| @ -921,6 +923,17 @@ int ObFastParserBase::get_one_insert_row_str(ObRawSql &raw_sql, | |||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | inline int64_t ObFastParserBase::notascii_gb_char(const int64_t pos) | ||||||
|  | { | ||||||
|  |   int64_t idf_pos = -1; | ||||||
|  |   if (notascii(raw_sql_.char_at(pos))) { | ||||||
|  |     idf_pos = pos + 1; | ||||||
|  |   } else { | ||||||
|  |     idf_pos = is_gbk_char(pos); | ||||||
|  |   } | ||||||
|  |   return idf_pos; | ||||||
|  | } | ||||||
|  |  | ||||||
| inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos) | inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos) | ||||||
| { | { | ||||||
|   int64_t idf_pos = -1; |   int64_t idf_pos = -1; | ||||||
| @ -1672,6 +1685,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos) | |||||||
|     // Most of the time, if it is not an identifier character, it maybe a space, |     // Most of the time, if it is not an identifier character, it maybe a space, | ||||||
|     // comma, opening parenthesis, or closing parenthesis. This judgment logic is |     // comma, opening parenthesis, or closing parenthesis. This judgment logic is | ||||||
|     // added here to avoid the next judgment whether it is utf8 char or gbk char |     // added here to avoid the next judgment whether it is utf8 char or gbk char | ||||||
|  |   } else if (!is_oracle_mode_) { | ||||||
|  |     idf_pos = notascii_gb_char(pos); | ||||||
|   } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { |   } else if (CHARSET_UTF8MB4 == charset_type_ || CHARSET_UTF16 == charset_type_) { | ||||||
|     idf_pos = is_utf8_char(pos); |     idf_pos = is_utf8_char(pos); | ||||||
|   } else if (ObCharset::is_gb_charset(charset_type_)) { |   } else if (ObCharset::is_gb_charset(charset_type_)) { | ||||||
|  | |||||||
| @ -381,6 +381,7 @@ protected: | |||||||
| 		return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)]; | 		return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)]; | ||||||
| 	} | 	} | ||||||
| 	void reset_parser_node(ParseNode *node); | 	void reset_parser_node(ParseNode *node); | ||||||
|  | 	int64_t notascii_gb_char(const int64_t pos); | ||||||
| 	//{U} | 	//{U} | ||||||
| 	int64_t is_latin1_char(const int64_t pos); | 	int64_t is_latin1_char(const int64_t pos); | ||||||
| 	// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U} | 	// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U} | ||||||
| @ -440,6 +441,11 @@ protected: | |||||||
| 		return is_valid_char(ch) && | 		return is_valid_char(ch) && | ||||||
| 		static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe; | 		static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe; | ||||||
| 	} | 	} | ||||||
|  | 	inline bool notascii(char ch) | ||||||
|  | 	{ | ||||||
|  | 		return 	is_valid_char(ch) && | ||||||
|  | 				(static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF); | ||||||
|  | 	} | ||||||
|     inline bool is_latin1(char ch) |     inline bool is_latin1(char ch) | ||||||
| 	{ | 	{ | ||||||
| 		return is_valid_char(ch) && | 		return is_valid_char(ch) && | ||||||
|  | |||||||
| @ -38,14 +38,12 @@ extern void obsql_mysql_parser_fatal_error(int32_t errcoyyde, yyscan_t yyscanner | |||||||
| /* the adq is used to process dq in ANSI_QUOTES sql_mode*/ | /* the adq is used to process dq in ANSI_QUOTES sql_mode*/ | ||||||
| %x adq | %x adq | ||||||
|  |  | ||||||
| U  [\x80-\xbf] | NOTASCII [\x80-\xFF] | ||||||
| U_2  [\xc2-\xdf] |  | ||||||
| U_3  [\xe0-\xef] |  | ||||||
| U_4  [\xf0-\xf4] |  | ||||||
| GB_1 [\x81-\xfe] | GB_1 [\x81-\xfe] | ||||||
| GB_2 [\x40-\xfe] | GB_2 [\x40-\xfe] | ||||||
| GB_3 [\x30-\x39] | GB_3 [\x30-\x39] | ||||||
| UTF8_GB_CHAR ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3}) | NOTASCII_GB_CHAR ({NOTASCII}|{GB_1}{GB_2}|{GB_1}{GB_3}{GB_1}{GB_3}) | ||||||
|  |  | ||||||
| space            [ \t\n\r\f] | space            [ \t\n\r\f] | ||||||
| non_newline      [^\n\r] | non_newline      [^\n\r] | ||||||
| sql_comment      ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r]) | sql_comment      ("--"[ \t]+{non_newline}*)|(#{non_newline}*|"--"[\n\r]) | ||||||
| @ -62,7 +60,7 @@ common_hint_begin (\/\*\+({space}*hint{space}+)?) | |||||||
| c_cmt_begin      \/\* | c_cmt_begin      \/\* | ||||||
| c_cmt_end        \*+\/ | c_cmt_end        \*+\/ | ||||||
| comment          ({sql_comment}) | comment          ({sql_comment}) | ||||||
| identifier        (([A-Za-z0-9$_]|{UTF8_GB_CHAR})+) | identifier        (([A-Za-z0-9$_]|{NOTASCII_GB_CHAR})+) | ||||||
| system_variable  (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*) | system_variable  (@@[A-Za-z_][A-Za-z0-9_]*)|(@@[`][`A-Za-z_][`A-Za-z_]*) | ||||||
| user_variable    (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*) | user_variable    (@[A-Za-z0-9_\.$]*)|(@[`'\"][`'\"A-Za-z0-9_\.$/%]*) | ||||||
| version_num      ([0-9]+\.+[0-9]*) | version_num      ([0-9]+\.+[0-9]*) | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	 akaError
					akaError