Support some charset grammar

This commit is contained in:
SevenJ-swj
2023-05-24 03:41:43 +00:00
committed by ob-robot
parent e530610cc6
commit 6bfb591de3
5 changed files with 214 additions and 23 deletions

View File

@ -2025,7 +2025,12 @@ int ObFastParserMysql::process_string(const char quote)
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret), K(need_mem_size));
} else {
ParseNode *node = new_node(buf, T_VARCHAR);
ObItemType param_type = T_VARCHAR;
if ('n' == raw_sql_.char_at(cur_token_begin_pos_) ||
'N' == raw_sql_.char_at(cur_token_begin_pos_)) {
param_type = T_NCHAR;
}
ParseNode *node = new_node(buf, param_type);
if (NULL != child_node) {
node->num_child_ = 1;
node->children_ = child_node;
@ -2056,6 +2061,8 @@ int ObFastParserMysql::process_identifier_begin_with_n()
cur_token_type_ = PARAM_TOKEN;
OZ (add_null_type_node());
}
} else if ('\'' == raw_sql_.char_at(raw_sql_.cur_pos_)) {
OZ (process_string('\''));
} else {
}
return ret;

View File

@ -285,6 +285,26 @@ FALSE {
return BOOL_VALUE;
}
("N"|"n"){sqbegin} {
BEGIN(sq);
ParseResult *p = (ParseResult *)yyextra;
p->start_col_ = yylloc->first_column;
char **tmp_literal = &(p->tmp_literal_);
if (NULL == *tmp_literal)
{
*tmp_literal = (char*) parse_malloc(p->input_sql_len_ + 1, p->malloc_pool_);
check_malloc(*tmp_literal);
}
check_value(yylval);
malloc_new_node(yylval->node, p->malloc_pool_, T_NCHAR, 0);
yylval->node->str_len_ = 0;
if (IS_FAST_PARAMETERIZE && !IS_NEED_PARAMETERIZE) {
COPY_WRITE();
}
}
{sqbegin} {
BEGIN(sq);
ParseResult *p = (ParseResult *)yyextra;
@ -305,6 +325,7 @@ FALSE {
check_value(yylval);
yylloc->first_column = p->start_col_;
FORMAT_STR_NODE(yylval->node);
int32_t token_ret = yylval->node->type_ == T_NCHAR ? NATIONAL_LITERAL : STRING_VALUE;
yylval->node->text_len_ = yylloc->last_column - p->start_col_ + 1;
COPY_STRING(p->input_sql_ + p->start_col_ - 1, yylval->node->text_len_, yylval->node->raw_text_);
if (IS_FAST_PARAMETERIZE) {
@ -312,7 +333,7 @@ FALSE {
} else {
yylval->node->sql_str_off_ = yylloc->first_column;
setup_token_pos_info(yylval->node, yylloc->first_column, yylval->node->text_len_ - 2);
return STRING_VALUE;
return token_ret;
}
}

View File

@ -54,6 +54,7 @@ extern void obsql_oracle_parse_fatal_error(int32_t errcode, yyscan_t yyscanner,
%token <node> NAME_OB
%token <node> STRING_VALUE
%token <node> NATIONAL_LITERAL
%token <node> INTNUM
%token <node> DATE_VALUE
%token <node> TIMESTAMP_VALUE
@ -939,6 +940,10 @@ STRING_VALUE %prec LOWER_THAN_COMP
make_name_node(concat_node, result->malloc_pool_, "concat");
malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, concat_node, string_list_node);
}
| NATIONAL_LITERAL
{
$$ = $1;
}
;
charset_introducer:
@ -5222,6 +5227,52 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i
$$->int32_values_[1] = 0; /* is char */
$$->sql_str_off_ = @1.first_column;
}
| NCHAR opt_string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $3);
$$->int32_values_[0] = $2[0];
$$->int32_values_[1] = 0; /* is char */
$$->sql_str_off_ = @1.first_column;
}
| NATIONAL CHARACTER opt_string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_CHAR, 3, charset_node, NULL, $4);
$$->int32_values_[0] = $3[0];
$$->int32_values_[1] = 0; /* is char */
$$->sql_str_off_ = @1.first_column;
}
/* | TEXT opt_binary opt_charset opt_collation
// {
// (void)($2);
@ -5235,12 +5286,98 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i
$$->int32_values_[0] = $2[0];
$$->int32_values_[1] = 0; /* is char */
}
| NCHAR VARCHAR string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4);
$$->int32_values_[0] = $3[0];
$$->int32_values_[1] = 0; /* is char */
}
| NVARCHAR string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $3);
$$->int32_values_[0] = $2[0];
$$->int32_values_[1] = 0; /* is char */
}
| NATIONAL VARCHAR string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $4);
$$->int32_values_[0] = $3[0];
$$->int32_values_[1] = 0; /* is char */
}
| CHARACTER VARYING string_length_i opt_binary opt_charset opt_collation
{
malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, $5, $6, $4);
$$->int32_values_[0] = $3[0];
$$->int32_values_[1] = 0; /* is char */
}
| NATIONAL CHARACTER VARYING string_length_i opt_binary
{
ParseNode *charset_node = NULL;
ParseNode *charset_name = NULL;
malloc_terminal_node(charset_name, result->malloc_pool_, T_VARCHAR);
malloc_terminal_node(charset_node, result->malloc_pool_, T_CHARSET);
charset_name->str_value_ = parse_strdup("utf8mb4", result->malloc_pool_, &(charset_name->str_len_));
if (OB_UNLIKELY(NULL == charset_name->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
charset_name->type_ = T_CHAR_CHARSET;
charset_name->param_num_ = 0;
charset_name->is_hidden_const_ = 1;
charset_node->str_value_ = charset_name->str_value_;
charset_node->str_len_ = charset_name->str_len_;
charset_node->sql_str_off_ = charset_name->sql_str_off_;
malloc_non_terminal_node($$, result->malloc_pool_, T_VARCHAR, 3, charset_node, NULL, $5);
$$->int32_values_[0] = $4[0];
$$->int32_values_[1] = 0; /* is char */
}
| blob_type_i opt_string_length_i_v2
{
malloc_terminal_node($$, result->malloc_pool_, $1[0]);

View File

@ -1982,6 +1982,34 @@ int ObResolverUtils::resolve_stmt_type(const ParseResult &result, stmt::StmtType
return ret;
}
int ObResolverUtils::set_string_val_charset(ObObjParam &val, ObString &charset, ObObj &result_val,
bool is_strict_mode,
bool return_ret)
{
int ret = OB_SUCCESS;
ObCharsetType charset_type = CHARSET_INVALID;
if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) {
ret = OB_ERR_UNKNOWN_CHARSET;
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr());
} else {
// use the default collation of the specified charset
ObCollationType collation_type = ObCharset::get_default_collation(charset_type);
val.set_collation_type(collation_type);
LOG_DEBUG("use default collation", K(charset_type), K(collation_type));
ObLength length = static_cast<ObLength>(ObCharset::strlen_char(val.get_collation_type(),
val.get_string_ptr(),
val.get_string_len()));
val.set_length(length);
// 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作
// check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true
if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, is_strict_mode, return_ret))) {
LOG_WARN("invalid str", K(ret), K(val), K(is_strict_mode), K(return_ret));
}
}
return ret;
}
int ObResolverUtils::resolve_const(const ParseNode *node,
const stmt::StmtType stmt_type,
ObIAllocator &allocator,
@ -2040,7 +2068,8 @@ int ObResolverUtils::resolve_const(const ParseNode *node,
ObString str_val;
ObObj result_val;
str_val.assign_ptr(const_cast<char *>(node->str_value_), static_cast<int32_t>(node->str_len_));
val.set_string(static_cast<ObObjType>(node->type_), str_val);
val.set_string(lib::is_mysql_mode() && is_nchar ?
ObVarcharType : static_cast<ObObjType>(node->type_), str_val);
// decide collation
/*
MySQL determines a literal's character set and collation in the following manner:
@ -2061,7 +2090,14 @@ int ObResolverUtils::resolve_const(const ParseNode *node,
// } else if (0 == node->num_child_) {
if (0 == node->num_child_) {
// for STRING without collation, e.g. show tables like STRING;
val.set_collation_type(connection_collation);
if (lib::is_mysql_mode() && is_nchar) {
ObString charset(strlen("utf8mb4"), "utf8mb4");
if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) {
LOG_WARN("set string val charset failed", K(ret));
}
} else {
val.set_collation_type(connection_collation);
}
} else {
// STRING in SQL expression
ParseNode *charset_node = NULL;
@ -2076,31 +2112,15 @@ int ObResolverUtils::resolve_const(const ParseNode *node,
ObCollationType collation_type = CS_TYPE_INVALID;
if (charset_node != NULL) {
ObString charset(charset_node->str_len_, charset_node->str_value_);
if (CHARSET_INVALID == (charset_type = ObCharset::charset_type(charset.trim()))) {
ret = OB_ERR_UNKNOWN_CHARSET;
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr());
} else {
// use the default collation of the specified charset
collation_type = ObCharset::get_default_collation(charset_type);
val.set_collation_type(collation_type);
LOG_DEBUG("use default collation", K(charset_type), K(collation_type));
ObLength length = static_cast<ObLength>(ObCharset::strlen_char(val.get_collation_type(),
val.get_string_ptr(),
val.get_string_len()));
val.set_length(length);
// 为了跟mysql报错一样,这里检查一下字符串是否合法,仅仅是检查,不合法则报错,不做其他操作
// check_well_formed_str的ret_error参数为true的时候,is_strict_mode参数失效,因此这里is_strict_mode直接传入true
if (OB_SUCC(ret) && OB_FAIL(ObSQLUtils::check_well_formed_str(val, result_val, true, true))) {
LOG_WARN("invalid str", K(ret), K(val));
}
if (OB_FAIL(set_string_val_charset(val, charset, result_val, false, false))) {
LOG_WARN("set string val charset failed", K(ret));
}
}
}
}
ObLengthSemantics length_semantics = LS_DEFAULT;
if (OB_SUCC(ret)) {
if (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_) {
if (lib::is_oracle_mode() && (T_NVARCHAR2 == node->type_ || T_NCHAR == node->type_)) {
length_semantics = LS_CHAR;
} else {
length_semantics = default_length_semantics;

View File

@ -344,6 +344,12 @@ public:
const ObSQLMode mode,
bool is_from_pl = false);
static int set_string_val_charset(ObObjParam &val,
ObString &charset,
ObObj &result_val,
bool is_strict_mode,
bool return_ret);
static int resolve_data_type(const ParseNode &type_node,
const common::ObString &ident_name,
common::ObDataType &data_type,