From 250d8e25bb9377ecda130b10e926a6bc8a1ed4b9 Mon Sep 17 00:00:00 2001 From: wangt1xiuyi <13547954130@163.com> Date: Thu, 8 Feb 2024 13:51:23 +0000 Subject: [PATCH] fix const number of fast parse and normal parse is different with mysql sql comment and parser error code refine --- src/sql/parser/gen_parser.sh | 4 ++++ src/sql/parser/ob_fast_parser.cpp | 18 ++++++++++++++---- src/sql/parser/sql_parser_base.c | 11 ++++++++++- src/sql/parser/sql_parser_mysql_mode.l | 13 +++++++++++++ src/sql/parser/sql_parser_mysql_mode.y | 4 +++- 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/sql/parser/gen_parser.sh b/src/sql/parser/gen_parser.sh index 051f0e02c6..6fe26d54f1 100755 --- a/src/sql/parser/gen_parser.sh +++ b/src/sql/parser/gen_parser.sh @@ -40,6 +40,7 @@ flex -Cfa -B -8 -o ../../../src/sql/parser/sql_parser_mysql_mode_lex.c ../../../ sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_mysql_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_mysql_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_mysql_mode_lex.c +sed "/obsql_mysql_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_mysql_mode_lex.c if [ -d "../../../close_modules/oracle_parser/sql/parser" ]; then @@ -82,6 +83,7 @@ flex -o ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c ../../../src sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c +sed "/obsql_oracle_latin1_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_latin1_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_latin1_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c @@ -139,6 +141,7 @@ flex -o ../../../src/sql/parser/sql_parser_oracle_utf8_mode_lex.c ../../../src/s sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_utf8_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_utf8_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_utf8_mode_lex.c +sed "/obsql_oracle_utf8_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_utf8_mode_lex.c cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_utf8_mode.c sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_utf8_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_utf8_mode.c sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_utf8_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_utf8_mode.c @@ -194,6 +197,7 @@ flex -o ../../../src/sql/parser/sql_parser_oracle_gbk_mode_lex.c ../../../src/sq sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_gbk_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_gbk_mode_lex.c sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_gbk_mode_lex.c +sed "/obsql_oracle_gbk_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_gbk_mode_lex.c cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_gbk_mode.c sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_gbk_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_gbk_mode.c sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_gbk_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_gbk_mode.c diff --git a/src/sql/parser/ob_fast_parser.cpp b/src/sql/parser/ob_fast_parser.cpp index 9a0eee0f8e..8217c7593d 100644 --- a/src/sql/parser/ob_fast_parser.cpp +++ b/src/sql/parser/ob_fast_parser.cpp @@ -2754,16 +2754,26 @@ int ObFastParserMysql::parse_next_token() } case '-': { // need to deal with sql_comment or negative sign - int64_t space_len = 0; ch = raw_sql_.scan(); - if ('-' == ch && IS_MULTI_SPACE(raw_sql_.cur_pos_ + 1, space_len)) { - // "--"{space}+{non_newline}* + if ('-' == ch && + raw_sql_.cur_pos_ + 1 < raw_sql_.raw_sql_len_ && + (raw_sql_.raw_sql_[raw_sql_.cur_pos_ + 1] == ' ' || + raw_sql_.raw_sql_[raw_sql_.cur_pos_ + 1] == '\t')) { + // "--"[ \t]+{non_newline}* cur_token_type_ = IGNORE_TOKEN; // skip the second '-' and space - raw_sql_.scan(1 + space_len); + raw_sql_.scan(1); while (!raw_sql_.is_search_end() && is_non_newline(ch)) { ch = raw_sql_.scan(); } + } else if ('-' == ch && + raw_sql_.cur_pos_ + 1 < raw_sql_.raw_sql_len_ && + (raw_sql_.raw_sql_[raw_sql_.cur_pos_ + 1] == '\n' || + raw_sql_.raw_sql_[raw_sql_.cur_pos_ + 1] == '\r')) { + // "--"[\n\r] + cur_token_type_ = IGNORE_TOKEN; + //skip the second '-' and ('\n' or \r) + raw_sql_.scan(1); } else { OZ (process_negative()); } diff --git a/src/sql/parser/sql_parser_base.c b/src/sql/parser/sql_parser_base.c index c360633258..be85268dd2 100644 --- a/src/sql/parser/sql_parser_base.c +++ b/src/sql/parser/sql_parser_base.c @@ -62,7 +62,7 @@ int parse_init(ParseResult *p) static __thread char error_msg[MAX_ERROR_MSG] = {'\0'}; p->error_msg_ = error_msg; if (OB_UNLIKELY(NULL == p || NULL == p->malloc_pool_)) { - ret = -1; + ret = OB_PARSER_ERR_UNEXPECTED; if (NULL != p) { (void)snprintf(p->error_msg_, MAX_ERROR_MSG, "malloc_pool_ must be set"); } @@ -108,6 +108,15 @@ int parse_init(ParseResult *p) #ifdef OB_BUILD_ORACLE_PARSER } #endif +#define ENOMEM 12 /* Out of memory */ + //refine parser error code to OB error code + if (0 == ret) { + ret = OB_PARSER_SUCCESS; + } else if (ENOMEM == ret) { + ret = OB_PARSER_ERR_NO_MEMORY; + } else { + ret = OB_PARSER_ERR_PARSE_SQL; + } } return ret; } diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index 266d959d26..4898d325d5 100644 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -135,6 +135,7 @@ NULL { check_value(yylval); ParseResult *p = (ParseResult *)yyextra; malloc_new_node(yylval->node, p->malloc_pool_, T_NULL, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); COPY_STRING(p->input_sql_ + yylloc->first_column - 1, yyleng, yylval->node->raw_text_); yylval->node->text_len_ = yyleng; if (IS_FAST_PARAMETERIZE) { @@ -152,6 +153,7 @@ NULL { ParseResult *p = (ParseResult *)yyextra; malloc_new_node(node, p->malloc_pool_, T_INT, 0); yylval->node = node; + CHECK_MYSQL_COMMENT(p, yylval->node); int err_no = 0; COPY_NUM_STRING(p, node); // we treated '- 12' as a const node, that is to say node->str_value_ = '- 12' @@ -205,6 +207,7 @@ NULL { malloc_new_node(node, p->malloc_pool_, T_DOUBLE, 0); check_value(yylval); yylval->node = node; + CHECK_MYSQL_COMMENT(p, yylval->node); COPY_NUM_STRING(p, node); node->raw_text_ = node->str_value_; node->text_len_ = node->str_len_; @@ -224,6 +227,7 @@ NULL { malloc_new_node(node, p->malloc_pool_, T_NUMBER/* should be T_NUMBER,*/, 0); check_value(yylval); yylval->node = node; + CHECK_MYSQL_COMMENT(p, yylval->node); COPY_NUM_STRING(p, node); node->raw_text_ = node->str_value_; node->text_len_ = node->str_len_; @@ -240,6 +244,7 @@ TRUE { check_value(yylval); ParseResult *p = (ParseResult *)yyextra; malloc_new_node(yylval->node, p->malloc_pool_, T_BOOL, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); yylval->node->value_ = 1; COPY_STRING(p->input_sql_ + yylloc->first_column - 1, yyleng, yylval->node->raw_text_); yylval->node->text_len_ = yyleng; @@ -265,6 +270,7 @@ FALSE { check_value(yylval); ParseResult *p = (ParseResult *)yyextra; malloc_new_node(yylval->node, p->malloc_pool_, T_BOOL, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); yylval->node->value_ = 0; COPY_STRING(p->input_sql_ + yylloc->first_column - 1, yyleng, yylval->node->raw_text_); yylval->node->text_len_ = yyleng; @@ -298,6 +304,7 @@ FALSE { } check_value(yylval); malloc_new_node(yylval->node, p->malloc_pool_, T_NCHAR, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); yylval->node->str_len_ = 0; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { @@ -312,6 +319,7 @@ FALSE { p->start_col_ = yylloc->first_column; check_value(yylval); malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); yylval->node->str_len_ = 0; p->last_escape_check_pos_ = 0; yylval->node->str_value_ = NULL; @@ -412,6 +420,7 @@ FALSE { } malloc_new_node(yylval->node->children_[0], p->malloc_pool_, T_CONCAT_STRING, 0); + CHECK_MYSQL_COMMENT(p, yylval->node->children_[0]); (*yylval->node->children_)->str_value_ = parse_strndup(tmp_literal, yylval->node->str_len_ + 1, p->malloc_pool_); check_malloc((*yylval->node->children_)->str_value_); @@ -459,6 +468,7 @@ FALSE { check_malloc(*tmp_literal); } malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); + CHECK_MYSQL_COMMENT(p, yylval->node); yylval->node->str_len_ = 0; p->last_escape_check_pos_ = 0; if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { @@ -553,6 +563,7 @@ FALSE { } malloc_new_node(yylval->node->children_[0], p->malloc_pool_, T_CONCAT_STRING, 0); + CHECK_MYSQL_COMMENT(p, yylval->node->children_[0]); (*yylval->node->children_)->str_value_ = parse_strndup(tmp_literal, yylval->node->str_len_ + 1, p->malloc_pool_); check_malloc((*yylval->node->children_)->str_value_); @@ -730,6 +741,7 @@ X'([0-9A-F])*'|0X([0-9A-F])+ { node->str_len_ = 0; } yylval->node = node; + CHECK_MYSQL_COMMENT(p, yylval->node); COPY_STRING(p->input_sql_ + yylloc->first_column - 1, yyleng, yylval->node->raw_text_); yylval->node->text_len_ = yyleng; yylval->node->is_copy_raw_text_ = 1; @@ -773,6 +785,7 @@ B'([01])*'|0B([01])+ { node->str_len_ = 0; } yylval->node = node; + CHECK_MYSQL_COMMENT(p, yylval->node); COPY_STRING(p->input_sql_ + yylloc->first_column - 1, yyleng, yylval->node->raw_text_); yylval->node->text_len_ = yyleng; yylval->node->is_copy_raw_text_ = 1; diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index a254030326..cb17fc60c0 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -968,6 +968,7 @@ STRING_VALUE %prec LOWER_THAN_COMP $$->sql_str_off_ = @1.first_column; @$.first_column = @1.first_column; @$.last_column = @1.last_column; + $$->is_forbid_parameter_ = $1->is_forbid_parameter_; } | charset_introducer STRING_VALUE { @@ -977,6 +978,7 @@ STRING_VALUE %prec LOWER_THAN_COMP $$->raw_text_ = $2->raw_text_; $$->text_len_ = $2->text_len_; $$->sql_str_off_ = $2->sql_str_off_; + $$->is_forbid_parameter_ = $2->is_forbid_parameter_; } | charset_introducer HEX_STRING_VALUE { @@ -987,6 +989,7 @@ STRING_VALUE %prec LOWER_THAN_COMP $$->raw_text_ = $2->raw_text_; $$->text_len_ = $2->text_len_; $$->sql_str_off_ = $2->sql_str_off_; + $$->is_forbid_parameter_ = $2->is_forbid_parameter_; } | STRING_VALUE string_val_list %prec LOWER_THAN_COMP { @@ -1106,7 +1109,6 @@ literal { $$ = $1; $$->sql_str_off_ = $1->sql_str_off_; - CHECK_MYSQL_COMMENT(result, $$); } | SYSTEM_VARIABLE { $$ = $1; } | QUESTIONMARK { $$ = $1; }