Support some charset grammar
This commit is contained in:
		| @ -2025,7 +2025,12 @@ int ObFastParserMysql::process_string(const char quote) | ||||
|           ret = OB_ALLOCATE_MEMORY_FAILED; | ||||
|           LOG_WARN("fail to alloc memory", K(ret), K(need_mem_size)); | ||||
|         } else { | ||||
|           ParseNode *node = new_node(buf, T_VARCHAR); | ||||
|           ObItemType param_type = T_VARCHAR; | ||||
|           if ('n' == raw_sql_.char_at(cur_token_begin_pos_) || | ||||
|               'N' == raw_sql_.char_at(cur_token_begin_pos_)) { | ||||
|             param_type = T_NCHAR; | ||||
|           } | ||||
|           ParseNode *node = new_node(buf, param_type); | ||||
|           if (NULL != child_node) { | ||||
|             node->num_child_ = 1; | ||||
|             node->children_ = child_node; | ||||
| @ -2056,6 +2061,8 @@ int ObFastParserMysql::process_identifier_begin_with_n() | ||||
|       cur_token_type_ = PARAM_TOKEN; | ||||
|       OZ (add_null_type_node()); | ||||
|     } | ||||
|   } else if ('\'' == raw_sql_.char_at(raw_sql_.cur_pos_)) { | ||||
|     OZ (process_string('\'')); | ||||
|   } else { | ||||
|   } | ||||
|   return ret; | ||||
|  | ||||
| @ -285,6 +285,26 @@ FALSE { | ||||
|   return BOOL_VALUE; | ||||
| } | ||||
|  | ||||
| ("N"|"n"){sqbegin} { | ||||
|   BEGIN(sq); | ||||
|   ParseResult *p = (ParseResult *)yyextra; | ||||
|   p->start_col_ = yylloc->first_column; | ||||
|   char **tmp_literal = &(p->tmp_literal_); | ||||
|   if (NULL == *tmp_literal) | ||||
|   { | ||||
|     *tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_); | ||||
|     check_malloc(*tmp_literal); | ||||
|   } | ||||
|   check_value(yylval); | ||||
|   malloc_new_node(yylval->node, p->malloc_pool_, T_NCHAR, 0); | ||||
|  | ||||
|   yylval->node->str_len_ = 0; | ||||
|   if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) { | ||||
|     COPY_WRITE(); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| {sqbegin} { | ||||
|   BEGIN(sq); | ||||
|   ParseResult *p = (ParseResult *)yyextra; | ||||
| @ -305,6 +325,7 @@ FALSE { | ||||
|   check_value(yylval); | ||||
|   yylloc->first_column = p->start_col_; | ||||
|   FORMAT_STR_NODE(yylval->node); | ||||
|   int32_t token_ret = yylval->node->type_ == T_NCHAR ? NATIONAL_LITERAL : STRING_VALUE; | ||||
|   yylval->node->text_len_ = yylloc->last_column - p->start_col_ + 1; | ||||
|   COPY_STRING(p->input_sql_ + p->start_col_ - 1, yylval->node->text_len_, yylval->node->raw_text_); | ||||
|   if (IS_FAST_PARAMETERIZE) { | ||||
| @ -312,7 +333,7 @@ FALSE { | ||||
|   } else { | ||||
|     yylval->node->sql_str_off_ = yylloc->first_column; | ||||
|     setup_token_pos_info(yylval->node, yylloc->first_column, yylval->node->text_len_ - 2); | ||||
|     return STRING_VALUE; | ||||
|     return token_ret; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -54,6 +54,7 @@ extern void obsql_oracle_parse_fatal_error(int32_t errcode, yyscan_t yyscanner, | ||||
|  | ||||
| %token <node> NAME_OB | ||||
| %token <node> STRING_VALUE | ||||
| %token <node> NATIONAL_LITERAL | ||||
| %token <node> INTNUM | ||||
| %token <node> DATE_VALUE | ||||
| %token <node> TIMESTAMP_VALUE | ||||
| @ -939,6 +940,10 @@ STRING_VALUE %prec LOWER_THAN_COMP | ||||
|   make_name_node(concat_node, result->malloc_pool_, "concat"); | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, concat_node, string_list_node); | ||||
| } | ||||
| | NATIONAL_LITERAL | ||||
| { | ||||
|    $$ = $1; | ||||
| } | ||||
| ; | ||||
|  | ||||
| charset_introducer: | ||||
| @ -5222,6 +5227,52 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
|   $$->sql_str_off_ = @1.first_column; | ||||
| } | ||||
| | NCHAR opt_string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|  | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $3); | ||||
|   $$->int32_values_[0] = $2[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
|   $$->sql_str_off_ = @1.first_column; | ||||
| } | ||||
| | NATIONAL CHARACTER opt_string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $4); | ||||
|   $$->int32_values_[0] = $3[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
|   $$->sql_str_off_ = @1.first_column; | ||||
| } | ||||
|  | ||||
| /*  | TEXT opt_binary opt_charset opt_collation | ||||
| //  { | ||||
| //    (void)($2); | ||||
| @ -5235,12 +5286,98 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i | ||||
|   $$->int32_values_[0] = $2[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | NCHAR VARCHAR string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|  | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4); | ||||
|   $$->int32_values_[0] = $3[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | NVARCHAR string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|  | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $3); | ||||
|   $$->int32_values_[0] = $2[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | NATIONAL VARCHAR string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4); | ||||
|   $$->int32_values_[0] = $3[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | CHARACTER VARYING string_length_i opt_binary opt_charset opt_collation | ||||
| { | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, $5, $6, $4); | ||||
|   $$->int32_values_[0] = $3[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | NATIONAL CHARACTER VARYING string_length_i opt_binary | ||||
| { | ||||
|   ParseNode *charset_node = NULL; | ||||
|   ParseNode *charset_name = NULL; | ||||
|   malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR); | ||||
|   malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET); | ||||
|   charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_)); | ||||
|   if (OB_UNLIKELY(NULL == charset_name->str_value_)) { | ||||
|     yyerror(NULL, result, "No more space for mallocing string\n"); | ||||
|     YYABORT_NO_MEMORY; | ||||
|   } | ||||
|   charset_name->type_ = T_CHAR_CHARSET; | ||||
|   charset_name->param_num_ = 0; | ||||
|   charset_name->is_hidden_const_ = 1; | ||||
|   charset_node->str_value_ = charset_name->str_value_; | ||||
|   charset_node->str_len_ = charset_name->str_len_; | ||||
|   charset_node->sql_str_off_ = charset_name->sql_str_off_; | ||||
|   malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $5); | ||||
|   $$->int32_values_[0] = $4[0]; | ||||
|   $$->int32_values_[1] = 0; /* is char */ | ||||
| } | ||||
| | blob_type_i opt_string_length_i_v2 | ||||
| { | ||||
|   malloc_terminal_node($$, result->malloc_pool_, $1[0]); | ||||
|  | ||||
| @ -1982,6 +1982,34 @@ int ObResolverUtils::resolve_stmt_type(const ParseResult &result, stmt::StmtType | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObResolverUtils::set_string_val_charset(ObObjParam &val, ObString &charset, ObObj &result_val, | ||||
|                                             bool is_strict_mode, | ||||
|                                             bool return_ret) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|   ObCharsetType charset_type = CHARSET_INVALID; | ||||
|   if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) { | ||||
|     ret = OB_ERR_UNKNOWN_CHARSET; | ||||
|     LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr()); | ||||
|   } else { | ||||
|     // use the default collation of the specified charset | ||||
|     ObCollationType collation_type = ObCharset::get_default_collation(charset_type); | ||||
|     val.set_collation_type(collation_type); | ||||
|     LOG_DEBUG("use default collation", K(charset_type), K(collation_type)); | ||||
|     ObLength length = static_cast<ObLength>(ObCharset::strlen_char(val.get_collation_type(), | ||||
|           val.get_string_ptr(), | ||||
|           val.get_string_len())); | ||||
|     val.set_length(length); | ||||
|  | ||||
|     // 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作 | ||||
|     // check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true | ||||
|     if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, is_strict_mode, return_ret))) { | ||||
|       LOG_WARN("invalid str", K(ret), K(val), K(is_strict_mode), K(return_ret)); | ||||
|     } | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObResolverUtils::resolve_const(const ParseNode *node, | ||||
|                                    const stmt::StmtType stmt_type, | ||||
|                                    ObIAllocator &allocator, | ||||
| @ -2040,7 +2068,8 @@ int ObResolverUtils::resolve_const(const ParseNode *node, | ||||
|         ObString str_val; | ||||
|         ObObj result_val; | ||||
|         str_val.assign_ptr(const_cast<char *>(node->str_value_), static_cast<int32_t>(node->str_len_)); | ||||
|         val.set_string(static_cast<ObObjType>(node->type_), str_val); | ||||
|         val.set_string(lib::is_mysql_mode() && is_nchar ? | ||||
|                               ObVarcharType : static_cast<ObObjType>(node->type_), str_val); | ||||
|         // decide collation | ||||
|         /* | ||||
|          MySQL determines a literal's character set and collation in the following manner: | ||||
| @ -2061,7 +2090,14 @@ int ObResolverUtils::resolve_const(const ParseNode *node, | ||||
| //        } else if (0 == node->num_child_) { | ||||
|         if (0 == node->num_child_) { | ||||
|           // for STRING without collation, e.g. show tables like STRING; | ||||
|           if (lib::is_mysql_mode() && is_nchar) { | ||||
|             ObString charset(strlen("utf8mb4"), "utf8mb4"); | ||||
|             if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) { | ||||
|               LOG_WARN("set string val charset failed", K(ret)); | ||||
|             } | ||||
|           } else { | ||||
|             val.set_collation_type(connection_collation); | ||||
|           } | ||||
|         } else { | ||||
|           // STRING in SQL expression | ||||
|           ParseNode *charset_node = NULL; | ||||
| @ -2076,31 +2112,15 @@ int ObResolverUtils::resolve_const(const ParseNode *node, | ||||
|             ObCollationType collation_type = CS_TYPE_INVALID; | ||||
|             if (charset_node != NULL) { | ||||
|               ObString charset(charset_node->str_len_, charset_node->str_value_); | ||||
|               if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) { | ||||
|                 ret = OB_ERR_UNKNOWN_CHARSET; | ||||
|                 LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr()); | ||||
|               } else { | ||||
|                 // use the default collation of the specified charset | ||||
|                 collation_type = ObCharset::get_default_collation(charset_type); | ||||
|                 val.set_collation_type(collation_type); | ||||
|                 LOG_DEBUG("use default collation", K(charset_type), K(collation_type)); | ||||
|                 ObLength length = static_cast<ObLength>(ObCharset::strlen_char(val.get_collation_type(), | ||||
|                       val.get_string_ptr(), | ||||
|                       val.get_string_len())); | ||||
|                 val.set_length(length); | ||||
|  | ||||
|                 // 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作 | ||||
|                 // check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true | ||||
|                 if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, true, true))) { | ||||
|                   LOG_WARN("invalid str", K(ret), K(val)); | ||||
|                 } | ||||
|               if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) { | ||||
|                 LOG_WARN("set string val charset failed", K(ret)); | ||||
|               } | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         ObLengthSemantics length_semantics = LS_DEFAULT; | ||||
|         if (OB_SUCC(ret)) { | ||||
|           if (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_) { | ||||
|           if (lib::is_oracle_mode() && (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_)) { | ||||
|             length_semantics = LS_CHAR; | ||||
|           } else { | ||||
|             length_semantics = default_length_semantics; | ||||
|  | ||||
| @ -344,6 +344,12 @@ public: | ||||
|                            const ObSQLMode mode, | ||||
|                            bool is_from_pl = false); | ||||
|  | ||||
|   static int set_string_val_charset(ObObjParam &val, | ||||
|                                     ObString &charset, | ||||
|                                     ObObj &result_val, | ||||
|                                     bool is_strict_mode, | ||||
|                                     bool return_ret); | ||||
|  | ||||
|   static int resolve_data_type(const ParseNode &type_node, | ||||
|                                const common::ObString &ident_name, | ||||
|                                common::ObDataType &data_type, | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 SevenJ-swj
					SevenJ-swj