From d06570baf45fbb56046ef721acdc2ddd3eab24a0 Mon Sep 17 00:00:00 2001 From: Monk-Liu <1152761042@qq.com> Date: Tue, 30 Nov 2021 11:12:43 +0800 Subject: [PATCH] Support sql_mode "ANSI_QUOTES". --- deps/oblib/src/common/sql_mode/ob_sql_mode.h | 5 + .../src/common/sql_mode/ob_sql_mode_utils.cpp | 3 +- src/sql/parser/sql_parser_mysql_mode.l | 100 +++++++++++++++--- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/deps/oblib/src/common/sql_mode/ob_sql_mode.h b/deps/oblib/src/common/sql_mode/ob_sql_mode.h index c6c33701b..91a036821 100644 --- a/deps/oblib/src/common/sql_mode/ob_sql_mode.h +++ b/deps/oblib/src/common/sql_mode/ob_sql_mode.h @@ -135,6 +135,11 @@ typedef uint64_t ObSQLMode; is_true = (SMO_NO_BACKSLASH_ESCAPES & mode); \ } +#define IS_ANSI_QUOTES(mode, is_true) \ + { \ + is_true = (SMO_ANSI_QUOTES & mode); \ + } + #ifdef __cplusplus } #endif diff --git a/deps/oblib/src/common/sql_mode/ob_sql_mode_utils.cpp b/deps/oblib/src/common/sql_mode/ob_sql_mode_utils.cpp index 9a7e67893..ff51ede9c 100644 --- a/deps/oblib/src/common/sql_mode/ob_sql_mode_utils.cpp +++ b/deps/oblib/src/common/sql_mode/ob_sql_mode_utils.cpp @@ -70,7 +70,8 @@ ObSQLMode SUPPORT_MODE = SMO_STRICT_ALL_TABLES | SMO_NO_KEY_OPTIONS | SMO_NO_TABLE_OPTIONS | SMO_NO_FIELD_OPTIONS - | SMO_NO_BACKSLASH_ESCAPES; + | SMO_NO_BACKSLASH_ESCAPES + | SMO_ANSI_QUOTES; bool is_sql_mode_supported(ObSQLMode mode) { diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index ada4f157a..57ca84508 100644 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -34,6 +34,8 @@ extern void obsql_mysql_parser_fatal_error(yyconst char *msg, yyscan_t yyscanner %x sq %x dq %x bt +/* the adq is used to process dq in ANSI_QUOTES sql_mode*/ +%x adq U [\x80-\xbf] U_2 [\xc2-\xdf] @@ -79,6 +81,7 @@ dqbegin {dquote} dqend {dquote} dqdouble {dquote}{dquote} dqcontent [^\\\n\r"]+ +adqcontent [^"]+ dqnewline {dquote}{whitespace}{dquote} backtick ` @@ -631,20 +634,41 @@ FALSE { } {dqbegin} { - BEGIN(dq); ParseResult *p = (ParseResult *)yyextra; - p->start_col_ = yylloc->first_column; - char **tmp_literal = &p->tmp_literal_; - if (*tmp_literal == NULL) - { - *tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_); - check_value(*tmp_literal); - } - malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); - yylval->node->str_len_ = 0; - p->last_well_formed_len_ = 0; - if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { - return OUTLINE_DEFAULT_TOKEN; + ObSQLMode mode = p->sql_mode_; + bool is_ansi_quotes = false; + IS_ANSI_QUOTES(mode, is_ansi_quotes); + if (is_ansi_quotes) { + BEGIN(adq); + if (IS_FAST_PARAMETERIZE) { + COPY_WRITE(); + } else { + check_value(yylval); + p->start_col_ = yylloc->first_column; + char **tmp_literal = &p->tmp_literal_; + if (*tmp_literal == NULL) + { + *tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_); + check_value(*tmp_literal); + } + malloc_new_node(yylval->node, p->malloc_pool_, T_IDENT, 0); + yylval->node->str_len_ = 0; + } + } else { + BEGIN(dq); + p->start_col_ = yylloc->first_column; + char **tmp_literal = &p->tmp_literal_; + if (*tmp_literal == NULL) + { + *tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_); + check_value(*tmp_literal); + } + malloc_new_node(yylval->node, p->malloc_pool_, T_VARCHAR, 0); + yylval->node->str_len_ = 0; + p->last_well_formed_len_ = 0; + if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { + return OUTLINE_DEFAULT_TOKEN; + } } } @@ -745,6 +769,56 @@ FALSE { return PARSER_SYNTAX_ERROR; } +{dqdouble} { + if (IS_FAST_PARAMETERIZE) { + COPY_WRITE(); + } else { + check_value(yylval); + ((ParseResult *)yyextra)->tmp_literal_[yylval->node->str_len_++] = '"'; + } +} + +{adqcontent} { + if (IS_FAST_PARAMETERIZE) { + COPY_WRITE(); + } else { + check_value(yylval); + memmove(((ParseResult *)yyextra)->tmp_literal_ + yylval->node->str_len_, yytext, yyleng); + yylval->node->str_len_ += yyleng; + } +} + +{dqend} { + BEGIN(INITIAL); + if (IS_FAST_PARAMETERIZE) { + COPY_WRITE(); + } else { + ParseResult *p = (ParseResult *)yyextra; + check_value(yylval); + yylloc->first_column = p->start_col_; + char *tmp_literal = p->tmp_literal_; + tmp_literal[yylval->node->str_len_] = '\0'; + + char *dup_value = NULL; + if (p->is_not_utf8_connection_) { + dup_value = parse_str_convert_utf8(p->charset_info_, tmp_literal, p->malloc_pool_, &(yylval->node->str_len_), &(p->extra_errno_)); + check_identifier_convert_result(p->extra_errno_); + //fprintf(stderr, "parse identifier result, str=%s, str_len=%ld\n", node->str_value_, node->str_len_); + } else { + dup_value = parse_strndup(tmp_literal, yylval->node->str_len_ + 1, p->malloc_pool_); + } + check_value(dup_value); + yylval->node->str_value_ = dup_value; + setup_token_pos_info(yylval->node, yylloc->first_column, yylval->node->str_len_); + return NAME_OB; + } +} + +<> { + yyerror(yylloc, yyextra, "unterminated backtick string\n"); + return PARSER_SYNTAX_ERROR; +} + {btbegin} { BEGIN(bt); /*fast parameterize don't handle connent in ``*/ if (IS_FAST_PARAMETERIZE) {