diff --git a/src/sql/parser/ob_fast_parser.cpp b/src/sql/parser/ob_fast_parser.cpp index 118c5d3bf7..12a7b24a02 100644 --- a/src/sql/parser/ob_fast_parser.cpp +++ b/src/sql/parser/ob_fast_parser.cpp @@ -2025,7 +2025,12 @@ int ObFastParserMysql::process_string(const char quote) ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", K(ret), K(need_mem_size)); } else { - ParseNode *node = new_node(buf, T_VARCHAR); + ObItemType param_type = T_VARCHAR; + if ('n' == raw_sql_.char_at(cur_token_begin_pos_) || + 'N' == raw_sql_.char_at(cur_token_begin_pos_)) { + param_type = T_NCHAR; + } + ParseNode *node = new_node(buf, param_type); if (NULL != child_node) { node->num_child_ = 1; node->children_ = child_node; @@ -2056,6 +2061,8 @@ int ObFastParserMysql::process_identifier_begin_with_n() cur_token_type_ = PARAM_TOKEN; OZ (add_null_type_node()); } + } else if ('\'' == raw_sql_.char_at(raw_sql_.cur_pos_)) { + OZ (process_string('\'')); } else { } return ret; diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index 5825220a1d..7693d56d4b 100755 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -285,6 +285,26 @@ FALSE { return BOOL_VALUE; } +("N"|"n"){sqbegin} { + BEGIN(sq); + ParseResult *p = (ParseResult *)yyextra; + p->start_col_ = yylloc->first_column; + char **tmp_literal = &(p->tmp_literal_); + if (NULL == *tmp_literal) + { + *tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_); + check_malloc(*tmp_literal); + } + check_value(yylval); + malloc_new_node(yylval->node, p->malloc_pool_, T_NCHAR, 0); + + yylval->node->str_len_ = 0; + if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { + COPY_WRITE(); + } +} + + {sqbegin} { BEGIN(sq); ParseResult *p = (ParseResult *)yyextra; @@ -305,6 +325,7 @@ FALSE { check_value(yylval); yylloc->first_column = p->start_col_; FORMAT_STR_NODE(yylval->node); + int32_t token_ret = yylval->node->type_ == T_NCHAR ? NATIONAL_LITERAL : STRING_VALUE; yylval->node->text_len_ = yylloc->last_column - p->start_col_ + 1; COPY_STRING(p->input_sql_ + p->start_col_ - 1, yylval->node->text_len_, yylval->node->raw_text_); if (IS_FAST_PARAMETERIZE) { @@ -312,7 +333,7 @@ FALSE { } else { yylval->node->sql_str_off_ = yylloc->first_column; setup_token_pos_info(yylval->node, yylloc->first_column, yylval->node->text_len_ - 2); - return STRING_VALUE; + return token_ret; } } diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index 56f9a4f6e4..c6b2017a15 100755 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -54,6 +54,7 @@ extern void obsql_oracle_parse_fatal_error(int32_t errcode, yyscan_t yyscanner, %token NAME_OB %token STRING_VALUE +%token NATIONAL_LITERAL %token INTNUM %token DATE_VALUE %token TIMESTAMP_VALUE @@ -939,6 +940,10 @@ STRING_VALUE %prec LOWER_THAN_COMP make_name_node(concat_node, result->malloc_pool_, "concat"); malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, concat_node, string_list_node); } +| NATIONAL_LITERAL +{ + $$ = $1; +} ; charset_introducer: @@ -5222,6 +5227,52 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i $$->int32_values_[1] = 0; /* is char */ $$->sql_str_off_ = @1.first_column; } +| NCHAR opt_string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + + malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $3); + $$->int32_values_[0] = $2[0]; + $$->int32_values_[1] = 0; /* is char */ + $$->sql_str_off_ = @1.first_column; +} +| NATIONAL CHARACTER opt_string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $4); + $$->int32_values_[0] = $3[0]; + $$->int32_values_[1] = 0; /* is char */ + $$->sql_str_off_ = @1.first_column; +} + /* | TEXT opt_binary opt_charset opt_collation // { // (void)($2); @@ -5235,12 +5286,98 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i $$->int32_values_[0] = $2[0]; $$->int32_values_[1] = 0; /* is char */ } +| NCHAR VARCHAR string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + + malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4); + $$->int32_values_[0] = $3[0]; + $$->int32_values_[1] = 0; /* is char */ +} +| NVARCHAR string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + + malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $3); + $$->int32_values_[0] = $2[0]; + $$->int32_values_[1] = 0; /* is char */ +} +| NATIONAL VARCHAR string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4); + $$->int32_values_[0] = $3[0]; + $$->int32_values_[1] = 0; /* is char */ +} | CHARACTER VARYING string_length_i opt_binary opt_charset opt_collation { malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, $5, $6, $4); $$->int32_values_[0] = $3[0]; $$->int32_values_[1] = 0; /* is char */ } +| NATIONAL CHARACTER VARYING string_length_i opt_binary +{ + ParseNode *charset_node = NULL; + ParseNode *charset_name = NULL; + malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); + malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); + charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); + if (OB_UNLIKELY(NULL == charset_name->str_value_)) { + yyerror(NULL, result, "No more space for mallocing string\n"); + YYABORT_NO_MEMORY; + } + charset_name->type_ = T_CHAR_CHARSET; + charset_name->param_num_ = 0; + charset_name->is_hidden_const_ = 1; + charset_node->str_value_ = charset_name->str_value_; + charset_node->str_len_ = charset_name->str_len_; + charset_node->sql_str_off_ = charset_name->sql_str_off_; + malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $5); + $$->int32_values_[0] = $4[0]; + $$->int32_values_[1] = 0; /* is char */ +} | blob_type_i opt_string_length_i_v2 { malloc_terminal_node($$, result->malloc_pool_, $1[0]); diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index 25e48266b9..c2a619b7c2 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -1982,6 +1982,34 @@ int ObResolverUtils::resolve_stmt_type(const ParseResult &result, stmt::StmtType return ret; } +int ObResolverUtils::set_string_val_charset(ObObjParam &val, ObString &charset, ObObj &result_val, + bool is_strict_mode, + bool return_ret) +{ + int ret = OB_SUCCESS; + ObCharsetType charset_type = CHARSET_INVALID; + if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) { + ret = OB_ERR_UNKNOWN_CHARSET; + LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr()); + } else { + // use the default collation of the specified charset + ObCollationType collation_type = ObCharset::get_default_collation(charset_type); + val.set_collation_type(collation_type); + LOG_DEBUG("use default collation", K(charset_type), K(collation_type)); + ObLength length = static_cast(ObCharset::strlen_char(val.get_collation_type(), + val.get_string_ptr(), + val.get_string_len())); + val.set_length(length); + + // 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作 + // check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true + if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, is_strict_mode, return_ret))) { + LOG_WARN("invalid str", K(ret), K(val), K(is_strict_mode), K(return_ret)); + } + } + return ret; +} + int ObResolverUtils::resolve_const(const ParseNode *node, const stmt::StmtType stmt_type, ObIAllocator &allocator, @@ -2040,7 +2068,8 @@ int ObResolverUtils::resolve_const(const ParseNode *node, ObString str_val; ObObj result_val; str_val.assign_ptr(const_cast(node->str_value_), static_cast(node->str_len_)); - val.set_string(static_cast(node->type_), str_val); + val.set_string(lib::is_mysql_mode() && is_nchar ? + ObVarcharType : static_cast(node->type_), str_val); // decide collation /* MySQL determines a literal's character set and collation in the following manner: @@ -2061,7 +2090,14 @@ int ObResolverUtils::resolve_const(const ParseNode *node, // } else if (0 == node->num_child_) { if (0 == node->num_child_) { // for STRING without collation, e.g. show tables like STRING; - val.set_collation_type(connection_collation); + if (lib::is_mysql_mode() && is_nchar) { + ObString charset(strlen("utf8mb4"), "utf8mb4"); + if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) { + LOG_WARN("set string val charset failed", K(ret)); + } + } else { + val.set_collation_type(connection_collation); + } } else { // STRING in SQL expression ParseNode *charset_node = NULL; @@ -2076,31 +2112,15 @@ int ObResolverUtils::resolve_const(const ParseNode *node, ObCollationType collation_type = CS_TYPE_INVALID; if (charset_node != NULL) { ObString charset(charset_node->str_len_, charset_node->str_value_); - if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) { - ret = OB_ERR_UNKNOWN_CHARSET; - LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr()); - } else { - // use the default collation of the specified charset - collation_type = ObCharset::get_default_collation(charset_type); - val.set_collation_type(collation_type); - LOG_DEBUG("use default collation", K(charset_type), K(collation_type)); - ObLength length = static_cast(ObCharset::strlen_char(val.get_collation_type(), - val.get_string_ptr(), - val.get_string_len())); - val.set_length(length); - - // 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作 - // check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true - if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, true, true))) { - LOG_WARN("invalid str", K(ret), K(val)); - } + if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) { + LOG_WARN("set string val charset failed", K(ret)); } } } } ObLengthSemantics length_semantics = LS_DEFAULT; if (OB_SUCC(ret)) { - if (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_) { + if (lib::is_oracle_mode() && (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_)) { length_semantics = LS_CHAR; } else { length_semantics = default_length_semantics; diff --git a/src/sql/resolver/ob_resolver_utils.h b/src/sql/resolver/ob_resolver_utils.h index 766f2986b9..10d924d6a3 100644 --- a/src/sql/resolver/ob_resolver_utils.h +++ b/src/sql/resolver/ob_resolver_utils.h @@ -344,6 +344,12 @@ public: const ObSQLMode mode, bool is_from_pl = false); + static int set_string_val_charset(ObObjParam &val, + ObString &charset, + ObObj &result_val, + bool is_strict_mode, + bool return_ret); + static int resolve_data_type(const ParseNode &type_node, const common::ObString &ident_name, common::ObDataType &data_type,