[FEAT MERGE] 424 SQL compatibility patch 433

Co-authored-by: wjhh2008 <wjh2006-1@163.com>
Co-authored-by: GongYusen <986957406@qq.com>
Co-authored-by: hy-guo <fqboyg@gmail.com>
This commit is contained in:
jingtaoye35
2024-08-23 03:32:01 +00:00
committed by ob-robot
parent cb61323917
commit e537a0c479
296 changed files with 21588 additions and 4122 deletions

View File

@ -8,6 +8,8 @@ set(ob_sql_parser_charset_object_list
ob_ctype.cc
ob_ctype_gbk.cc
ob_ctype_latin1.cc
ob_ctype_ascii.cc
ob_ctype_tis620.cc
ob_ctype_mb.cc
ob_ctype_simple.cc
ob_ctype_utf8.cc
@ -50,17 +52,17 @@ if (OB_BUILD_ORACLE_PARSER)
sql_parser_oracle_gbk_mode_lex.h
sql_parser_oracle_gbk_mode_tab.c
sql_parser_oracle_gbk_mode_tab.h
sql_parser_oracle_latin1_mode_lex.c
sql_parser_oracle_latin1_mode_lex.h
sql_parser_oracle_latin1_mode_tab.c
sql_parser_oracle_latin1_mode_tab.h
sql_parser_oracle_single_byte_mode_lex.c
sql_parser_oracle_single_byte_mode_lex.h
sql_parser_oracle_single_byte_mode_tab.c
sql_parser_oracle_single_byte_mode_tab.h
)
set(ob_inner_sql_parser_object_list
${ob_inner_sql_parser_object_list}
non_reserved_keywords_oracle_utf8_mode.c
non_reserved_keywords_oracle_gbk_mode.c
non_reserved_keywords_oracle_latin1_mode.c
non_reserved_keywords_oracle_single_byte_mode.c
)
endif()

View File

@ -50,47 +50,47 @@ ln -sf ../../../close_modules/oracle_parser/sql/parser/sql_parser_oracle_mode.l
# generate oracle latin1 sql_parser(do not support multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis)
##1.copy lex and yacc files
cat ../../../src/sql/parser/sql_parser_oracle_mode.y > ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
cat ../../../src/sql/parser/sql_parser_oracle_mode.l > ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
cat ../../../src/sql/parser/sql_parser_oracle_mode.y > ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
cat ../../../src/sql/parser/sql_parser_oracle_mode.l > ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
##2.replace name
sed "s/obsql_oracle_yy/obsql_oracle_latin1_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/obsql_oracle_yy/obsql_oracle_latin1_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/sql_parser_oracle_mode/sql_parser_oracle_latin1_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/sql_parser_oracle_mode/sql_parser_oracle_latin1_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_latin1_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_latin1_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/obsql_oracle_fast_parse/obsql_oracle_latin1_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/obsql_oracle_multi_fast_parse/obsql_oracle_latin1_multi_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/obsql_oracle_multi_values_parse/obsql_oracle_latin1_multi_values_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
sed "s/obsql_oracle_yy/obsql_oracle_single_byte_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
sed "s/obsql_oracle_yy/obsql_oracle_single_byte_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/sql_parser_oracle_mode/sql_parser_oracle_single_byte_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
sed "s/sql_parser_oracle_mode/sql_parser_oracle_single_byte_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_single_byte_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_single_byte_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/obsql_oracle_fast_parse/obsql_oracle_single_byte_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
sed "s/obsql_oracle_multi_fast_parse/obsql_oracle_single_byte_multi_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
sed "s/obsql_oracle_multi_values_parse/obsql_oracle_single_byte_multi_values_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
##3.do not need to replace multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis code
sed "s/multi_byte_space \[\\\u3000\]/multi_byte_space \[\\\x20]/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/multi_byte_comma \[\\\uff0c\]/multi_byte_comma \[\\\x2c]/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/multi_byte_left_parenthesis \[\\\uff08\]/multi_byte_left_parenthesis \[\\\x28]/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/multi_byte_right_parenthesis \[\\\uff09\]/multi_byte_right_parenthesis \[\\\x29]/g" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed "s/multi_byte_space \[\\\u3000\]/multi_byte_space \[\\\x20]/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/multi_byte_comma \[\\\uff0c\]/multi_byte_comma \[\\\x2c]/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/multi_byte_left_parenthesis \[\\\uff08\]/multi_byte_left_parenthesis \[\\\x28]/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed "s/multi_byte_right_parenthesis \[\\\uff09\]/multi_byte_right_parenthesis \[\\\x29]/g" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
echo "LATIN1_CHAR [\x80-\xFF]" > ../../../src/sql/parser/latin1.txt
sed '/following character status will be rewrite by gen_parse.sh according to connection character/d' -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//r ../../../src/sql/parser/latin1.txt' -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//d' -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed 's/multi_byte_connect_char/LATIN1_CHAR/g' -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
sed -i '/<hint>{multi_byte_space}/,+5d' sql_parser_oracle_latin1_mode.l
sed -i '/<hint>{multi_byte_comma}/,+35d' sql_parser_oracle_latin1_mode.l
sed -i '/{multi_byte_comma}/,+23d' sql_parser_oracle_latin1_mode.l
sed -i '/{multi_byte_space}/,+4d' sql_parser_oracle_latin1_mode.l
sed '/following character status will be rewrite by gen_parse.sh according to connection character/d' -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//r ../../../src/sql/parser/latin1.txt' -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//d' -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed 's/multi_byte_connect_char/LATIN1_CHAR/g' -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
sed -i '/<hint>{multi_byte_space}/,+5d' sql_parser_oracle_single_byte_mode.l
sed -i '/<hint>{multi_byte_comma}/,+35d' sql_parser_oracle_single_byte_mode.l
sed -i '/{multi_byte_comma}/,+23d' sql_parser_oracle_single_byte_mode.l
sed -i '/{multi_byte_space}/,+4d' sql_parser_oracle_single_byte_mode.l
##4.generate oracle latin1 parser files
bison_parser ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y ../../../src/sql/parser/sql_parser_oracle_latin1_mode_tab.c
flex -o ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l ../../../src/sql/parser/sql_parser_oracle_latin1_mode_tab.h
bison_parser ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_tab.c
flex -o ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_lex.c ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_tab.h
##5.replace other info
sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c
sed "/obsql_oracle_latin1_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_latin1_mode_lex.c
cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c
sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_latin1_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c
sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_latin1_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_latin1_mode.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_lex.c
sed "/obsql_oracle_single_byte_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_single_byte_mode_lex.c
cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_single_byte_mode.c
sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_single_byte_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_single_byte_mode.c
sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_single_byte_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_single_byte_mode.c
##6.clean useless files
rm -f ../../../src/sql/parser/latin1.txt
rm -f ../../../src/sql/parser/sql_parser_oracle_latin1_mode.l
rm -f ../../../src/sql/parser/sql_parser_oracle_latin1_mode.y
rm -f ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.l
rm -f ../../../src/sql/parser/sql_parser_oracle_single_byte_mode.y
# generate oracle utf8 sql_parser(support multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis)
##1.copy lex and yacc files

View File

@ -1057,6 +1057,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] =
{"rb_build_agg", RB_BUILD_AGG},
{"rb_or_agg", RB_OR_AGG},
{"rb_and_agg", RB_AND_AGG},
{"optimizer_costs", OPTIMIZER_COSTS}
};
/** https://dev.mysql.com/doc/refman/5.7/en/sql-syntax-prepared-statements.html

View File

@ -130,6 +130,9 @@ int ObFastParserBase::parse(const ObString &stmt,
static_cast<char *>(allocator_.alloc((len + 1))))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc memory", K(ret), K(len));
} else if (OB_ISNULL(charset_info_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error", K(ret), K(charset_info_));
} else {
no_param_sql_[0] = '\0';
while (len > 0 && is_space(stmt[len - 1])) {
@ -457,8 +460,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
} else if (charset_info_->mbmaxlen == 1) {
idf_pos = is_single_byte_char(pos);
}
return idf_pos;
}
@ -930,18 +933,18 @@ int ObFastParserBase::get_one_insert_row_str(ObRawSql &raw_sql,
inline int64_t ObFastParserBase::notascii_gb_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (notascii(raw_sql_.char_at(pos))) {
if ((idf_pos = is_gbk_char(pos)) != -1) {
//do nothing
} else if (notascii(raw_sql_.char_at(pos))) {
idf_pos = pos + 1;
} else {
idf_pos = is_gbk_char(pos);
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos)
inline int64_t ObFastParserBase::is_single_byte_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (is_latin1(raw_sql_.char_at(pos))) {
if (is_single_byte(raw_sql_.char_at(pos))) {
idf_pos = pos + 1;
}
return idf_pos;
@ -1622,8 +1625,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
idf_pos = is_utf8_char(pos);
} else if (ObCharset::is_gb_charset(charset_type_)) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
} else if (charset_info_->mbmaxlen == 1) {
idf_pos = is_single_byte_char(pos);
}
return idf_pos;
}

View File

@ -383,7 +383,7 @@ protected:
void reset_parser_node(ParseNode *node);
int64_t notascii_gb_char(const int64_t pos);
//{U}
int64_t is_latin1_char(const int64_t pos);
int64_t is_single_byte_char(const int64_t pos);
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
int64_t is_utf8_char(const int64_t pos);
// NOTES: No boundary check, the caller guarantees safety!!!
@ -446,7 +446,7 @@ protected:
return is_valid_char(ch) &&
(static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF);
}
inline bool is_latin1(char ch)
inline bool is_single_byte(char ch)
{
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x80 && static_cast<uint8_t>(ch) <= 0xFF;

View File

@ -120,11 +120,13 @@ char *replace_invalid_character(const struct ObCharsetInfo* src_cs, const struct
const char *str, int64_t *out_len, void *malloc_pool, int *extra_errno)
{
char *out_str = NULL;
if (OB_ISNULL(str) || OB_ISNULL(extra_errno) || OB_ISNULL(out_len)) {
if (OB_ISNULL(str) || OB_ISNULL(extra_errno) || OB_ISNULL(out_len) || OB_ISNULL(src_cs)) {
} else if (NULL == oracle_db_cs) {
out_str = const_cast<char *>(str);
} else {
ob_wc_t replace_char = !!(oracle_db_cs->state & OB_CS_UNICODE) ? 0xFFFD : '?';
ob_wc_t replace_char = (!!(oracle_db_cs->state & OB_CS_UNICODE)) &&
(!!(src_cs->state & OB_CS_UNICODE))
? 0xFFFD : '?';
uint errors = 0;
size_t str_len = STRLEN(str);
char *temp_str = NULL;
@ -269,8 +271,10 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
//case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
//case 47/*CS_TYPE_LATIN1_BIN*/:
case 245/*CS_TYPE_UTF8MB4_CROATIAN_CI*/:
case 246/*CS_TYPE_UTF8MB4_UNICODE_520_CI*/:
case 234/*CS_TYPE_UTF8MB4_CZECH_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
{
if (i + 2 < dup_len) {
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {

View File

@ -384,6 +384,28 @@ ParseNode *new_non_terminal_node(void *malloc_pool, ObItemType node_tag, int num
return ret_node;
}
ParseNode *new_list_node(void *malloc_pool, ObItemType node_tag, int capacity, int num, ...)
{
ParseNode *ret_node = NULL;
if (OB_UNLIKELY(capacity <= 0 || num <= 0 || num > capacity)) {
(void)fprintf(stderr, "ERROR invalid num:%d capacity:%d\n", num, capacity);
} else {
int32_t i = 0;
va_list va;
ret_node = new_node(malloc_pool, node_tag, capacity);
if (OB_LIKELY(NULL != ret_node)) {
ret_node->value_ = capacity;
ret_node->num_child_ = num;
va_start(va, num);
for (; i < num; ++i) {
ret_node->children_[i] = va_arg(va, ParseNode *);
}
va_end(va);
}
}
return ret_node;
}
char *copy_expr_string(ParseResult *p, int expr_start, int expr_end)
{
char *expr_string = NULL;
@ -826,6 +848,129 @@ extern bool nodename_is_sdo_geometry_type(const ParseNode *node)
return result;
}
int64_t get_need_reserve_capacity(int64_t n)
{
int64_t capacity = 0;
// equal to OB_MALLOC_BIG_BLOCK_SIZE in ob_define.h
const int64_t max_delta_capacity = (1LL << 21) / sizeof(ParseNode*); // 2MB
if (n <= 2) {
capacity = 2;
} else if ((n & (n - 1)) == 0) {
capacity = n;
} else if (n > max_delta_capacity) {
int64_t i = n / max_delta_capacity;
capacity = max_delta_capacity * (i + 1);
} else {
capacity = 4;
while (capacity < n) {
capacity <<= 1;
}
}
return capacity;
}
// (A OR B) OR C --> OR (A, B, C)
ParseNode *push_back_child(void *malloc_pool, int *error_code, ParseNode *left_node, ParseNode *node)
{
ParseNode *ret_node = NULL;
if (OB_ISNULL(malloc_pool) || OB_ISNULL(error_code)) {
(void)fprintf(stderr, "ERROR parser result is NULL\n");
} else if (NULL == left_node || NULL == node) {
/* do nothing */
} else if ((left_node->type_ != T_OP_OR &&
left_node->type_ != T_OP_AND &&
left_node->type_ != T_EXPR_LIST) ||
left_node->value_ == INT64_MAX) {
*error_code = OB_PARSER_ERR_UNEXPECTED;
} else {
int64_t capacity = get_need_reserve_capacity(left_node->num_child_ + 1);
if (left_node->value_ < capacity) {
ParseNode *new_op = new_node(malloc_pool, left_node->type_, capacity);
if (OB_ISNULL(new_op)) {
*error_code = OB_PARSER_ERR_NO_MEMORY;
} else {
MEMCPY(new_op->children_, left_node->children_, sizeof(ParseNode*) * left_node->num_child_);
new_op->children_[left_node->num_child_] = node;
new_op->num_child_ = left_node->num_child_ + 1;
new_op->value_ = capacity;
ret_node = new_op;
}
} else {
left_node->children_[left_node->num_child_] = node;
left_node->num_child_ += 1;
ret_node = left_node;
}
}
return ret_node;
}
// A OR (B OR C) --> OR (A, B, C)
ParseNode *push_front_child(void *malloc_pool, int *error_code, ParseNode *right_node, ParseNode *node)
{
ParseNode *ret_node = NULL;
if (OB_ISNULL(malloc_pool) || OB_ISNULL(error_code)) {
(void)fprintf(stderr, "ERROR parser result is NULL\n");
} else if (NULL == right_node || NULL == node) {
/* do nothing */
} else if ((right_node->type_ != T_OP_OR &&
right_node->type_ != T_OP_AND &&
right_node->type_ != T_EXPR_LIST) ||
right_node->value_ == INT64_MAX) {
*error_code = OB_PARSER_ERR_UNEXPECTED;
} else {
int64_t capacity = get_need_reserve_capacity(right_node->num_child_ + 1);
ParseNode *new_op = new_node(malloc_pool, right_node->type_, capacity);
if (OB_ISNULL(new_op)) {
*error_code = OB_PARSER_ERR_NO_MEMORY;
} else {
new_op->children_[0] = node;
MEMCPY(new_op->children_ + 1, right_node->children_, sizeof(ParseNode*) * right_node->num_child_);
new_op->value_ = capacity;
new_op->num_child_ = right_node->num_child_ + 1;
ret_node = new_op;
}
}
return ret_node;
}
// (A OR B) OR (C OR D) --> OR (A, B, C, D)
ParseNode *append_child(void *malloc_pool, int *error_code, ParseNode *left_node, ParseNode *right_node)
{
ParseNode *ret_node = NULL;
if (OB_ISNULL(malloc_pool) || OB_ISNULL(error_code)) {
(void)fprintf(stderr, "ERROR parser result is NULL\n");
} else if (NULL == left_node || NULL == right_node) {
/* do nothing */
} else if (left_node->type_ != right_node->type_ ||
(left_node->type_ != T_OP_OR &&
left_node->type_ != T_OP_AND &&
left_node->type_ != T_EXPR_LIST) ||
left_node->value_ == INT64_MAX ||
right_node->value_ == INT64_MAX) {
*error_code = OB_PARSER_ERR_UNEXPECTED;
} else {
int64_t num_child = left_node->num_child_ + right_node->num_child_;
int64_t capacity = get_need_reserve_capacity(num_child);
if (left_node->value_ < capacity) {
ParseNode *new_op = new_node(malloc_pool, left_node->type_, capacity);
if (OB_ISNULL(new_op)) {
*error_code = OB_PARSER_ERR_NO_MEMORY;
} else {
MEMCPY(new_op->children_, left_node->children_, sizeof(ParseNode*) * left_node->num_child_);
MEMCPY(new_op->children_ + left_node->num_child_, right_node->children_, sizeof(ParseNode*) * right_node->num_child_);
new_op->num_child_ = num_child;
new_op->value_ = capacity;
ret_node = new_op;
}
} else {
MEMCPY(left_node->children_ + left_node->num_child_, right_node->children_, sizeof(ParseNode*) * right_node->num_child_);
left_node->num_child_ = num_child;
ret_node = left_node;
}
}
return ret_node;
}
ParseNode *adjust_inner_join_inner(int *error_code, ParseNode *inner_join, ParseNode *table_node)
{
ParseNode *ret_node = NULL;

View File

@ -56,7 +56,7 @@ enum SelectParserOffset
PARSE_SELECT_LIMIT,
PARSE_SELECT_FOR_UPD,
PARSE_SELECT_HINTS,
PARSE_SELECT_WHEN,
PARSE_SELECT_WHEN, // I find that it is no longer used.
PARSE_SELECT_FETCH,
PARSE_SELECT_FETCH_TEMP, //use to temporary store fetch clause in parser
PARSE_SELECT_WITH_CHECK_OPTION,
@ -386,6 +386,7 @@ extern int64_t str_remove_space(char *buff, int64_t len);
extern ParseNode *new_node(void *malloc_pool, ObItemType type, int num);
extern ParseNode *new_non_terminal_node(void *malloc_pool, ObItemType node_tag, int num, ...);
extern ParseNode *new_terminal_node(void *malloc_pool, ObItemType type);
extern ParseNode *new_list_node(void *malloc_pool, ObItemType node_tag, int capacity, int num, ...);
extern int obpl_parser_check_stack_overflow();
@ -412,6 +413,10 @@ extern bool parsenode_equal(const ParseNode *node1, const ParseNode *node2, int
extern int64_t get_question_mark(ObQuestionMarkCtx *ctx, void *malloc_pool, const char *name);
extern int64_t get_question_mark_by_defined_name(ObQuestionMarkCtx *ctx, const char *name);
extern int64_t get_need_reserve_capacity(int64_t n);
extern ParseNode *push_back_child(void *malloc_pool, int *error_code, ParseNode *left_node, ParseNode *node);
extern ParseNode *push_front_child(void *malloc_pool, int *error_code, ParseNode *right_node, ParseNode *node);
extern ParseNode *append_child(void *malloc_pool, int *error_code, ParseNode *left_node, ParseNode *right_node);
extern ParseNode *adjust_inner_join_inner(int *error_code, ParseNode *inner_join, ParseNode *table_node);
// compare ParseNode str_value_ to pattern

View File

@ -28,15 +28,15 @@ extern YY_BUFFER_STATE obsql_mysql_yy_scan_bytes (yyconst char *bytes,int len ,y
extern void obsql_mysql_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_mysql_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
#ifdef OB_BUILD_ORACLE_PARSER
extern int obsql_oracle_latin1_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_latin1_yyparse(ParseResult *result);
extern int obsql_oracle_latin1_multi_fast_parse(ParseResult *p);
extern int obsql_oracle_latin1_multi_values_parse(ParseResult *p);
extern int obsql_oracle_latin1_fast_parse(ParseResult *p);
extern int obsql_oracle_latin1_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_latin1_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_latin1_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_latin1_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obsql_oracle_single_byte_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_single_byte_yyparse(ParseResult *result);
extern int obsql_oracle_single_byte_multi_fast_parse(ParseResult *p);
extern int obsql_oracle_single_byte_multi_values_parse(ParseResult *p);
extern int obsql_oracle_single_byte_fast_parse(ParseResult *p);
extern int obsql_oracle_single_byte_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_single_byte_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_single_byte_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_single_byte_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obsql_oracle_utf8_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_utf8_yyparse(ParseResult *result);
extern int obsql_oracle_utf8_multi_fast_parse(ParseResult *p);
@ -89,11 +89,16 @@ int parse_init(ParseResult *p)
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
ret = obsql_oracle_utf8_yylex_init_extra(p, &(p->yyscan_info_));
break;
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:
ret = obsql_oracle_latin1_yylex_init_extra(p, &(p->yyscan_info_));
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
ret = obsql_oracle_single_byte_yylex_init_extra(p, &(p->yyscan_info_));
break;
default: {
ret = -1;
@ -145,11 +150,16 @@ int parse_terminate(ParseResult *p)
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
ret = obsql_oracle_utf8_yylex_destroy(p->yyscan_info_);
break;
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:
ret = obsql_oracle_latin1_yylex_destroy(p->yyscan_info_);
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
ret = obsql_oracle_single_byte_yylex_destroy(p->yyscan_info_);
break;
default: {
ret = -1;
@ -264,7 +274,9 @@ int parse_sql(ParseResult *p, const char *buf, size_t input_len)
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:{
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
{
YY_BUFFER_STATE bp = obsql_oracle_utf8_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_utf8_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
@ -291,19 +303,23 @@ int parse_sql(ParseResult *p, const char *buf, size_t input_len)
obsql_oracle_utf8_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:{
YY_BUFFER_STATE bp = obsql_oracle_latin1_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_latin1_yy_switch_to_buffer(bp, p->yyscan_info_);
YY_BUFFER_STATE bp = obsql_oracle_single_byte_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_single_byte_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_latin1_fast_parse(p);
tmp_ret = obsql_oracle_single_byte_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_latin1_multi_fast_parse(p);
tmp_ret = obsql_oracle_single_byte_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_latin1_multi_values_parse(p);
tmp_ret = obsql_oracle_single_byte_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_latin1_yyparse(p);
tmp_ret = obsql_oracle_single_byte_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
@ -316,7 +332,7 @@ int parse_sql(ParseResult *p, const char *buf, size_t input_len)
ret = OB_PARSER_ERR_PARSE_SQL;
}
}
obsql_oracle_latin1_yy_delete_buffer(bp, p->yyscan_info_);
obsql_oracle_single_byte_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
default: {

View File

@ -74,7 +74,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_NO_MEMORY; \
} else {/*do nothing*/} \
YYABORT; \
@ -84,7 +84,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_UNEXPECTED; \
} else {/*do nothing*/} \
YYABORT; \
@ -94,7 +94,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_TOO_BIG_DISPLAYWIDTH; \
} else {/*do nothing*/} \
YYABORT; \
@ -104,7 +104,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_STR_LITERAL_TOO_LONG;\
} else {/*do nothing*/} \
yyerror(yylloc, yyextra, "string literal is too long\n", yytext); \
@ -115,7 +115,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_UNDECLARED_VAR;\
} else {/*do nothing*/} \
YYABORT; \
@ -125,7 +125,7 @@ int add_alias_name(ParseNode *node, ParseResult *result, int end);
do { \
if (OB_UNLIKELY(NULL == result)) { \
(void)fprintf(stderr, "ERROR : result is NULL\n"); \
} else if (0 == result->extra_errno_) { \
} else if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_NOT_VALID_ROUTINE_NAME;\
} else {/*do nothing*/} \
YYABORT; \
@ -159,6 +159,14 @@ do {
} \
} while(0)
#define malloc_list_node(node, malloc_pool, node_tag, ...) \
do { \
if (OB_UNLIKELY(NULL == (node = new_list_node(malloc_pool, node_tag, ##__VA_ARGS__)))) {\
yyerror(NULL, result, "No more space for malloc\n"); \
YYABORT_NO_MEMORY; \
} \
} while(0)
#define merge_nodes(node, result, node_tag, source_tree) \
do { \
if (OB_UNLIKELY(NULL == source_tree)) { \
@ -1211,6 +1219,69 @@ do {\
}\
} while(0);\
#define push_back_list(malloc_pool, result, ret_node, left_node, right_node) \
do { \
ret_node = push_back_child(malloc_pool, &result->extra_errno_, left_node, right_node); \
if (OB_UNLIKELY(NULL == ret_node)) { \
if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_UNEXPECTED; \
} \
yyerror(NULL, result, "error happened\n"); \
YYABORT; \
} \
} while(0); \
#define push_front_list(malloc_pool, result, ret_node, left_node, right_node) \
do { \
ret_node = push_front_child(malloc_pool, &result->extra_errno_, left_node, right_node); \
if (OB_UNLIKELY(NULL == ret_node)) { \
if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_UNEXPECTED; \
} \
yyerror(NULL, result, "error happened\n"); \
YYABORT; \
} \
} while(0); \
#define append_list(malloc_pool, result, ret_node, left_node, right_node) \
do { \
ret_node = append_child(malloc_pool, &result->extra_errno_, left_node, right_node); \
if (OB_UNLIKELY(NULL == ret_node)) { \
if (OB_PARSER_SUCCESS == result->extra_errno_) { \
result->extra_errno_ = OB_PARSER_ERR_UNEXPECTED; \
} \
yyerror(NULL, result, "error happened\n"); \
YYABORT; \
} \
} while(0); \
#define flatten_and_or(malloc_pool, result, ret_node, left_node, right_node, type) \
do { \
ret_node = NULL; \
if (NULL == left_node || NULL == right_node || (T_OP_OR != type && T_OP_AND != type)) { \
result->extra_errno_ = OB_PARSER_ERR_UNEXPECTED; \
yyerror(NULL, result, "unexpected param\n"); \
YYABORT; \
} else if (left_node->type_ == type && right_node->type_ == type) { \
/* (A OR B) OR (C OR D) */ \
append_list(malloc_pool, result, ret_node, left_node, right_node); \
} else if (left_node->type_ == type && right_node->type_ != type) { \
/* (A OR B) OR C */ \
push_back_list(malloc_pool, result, ret_node, left_node, right_node); \
} else if (left_node->type_ != type && right_node->type_ == type) { \
/* A OR (B OR C) */ \
push_front_list(malloc_pool, result, ret_node, right_node, left_node); \
} else { \
ret_node = new_list_node(malloc_pool, type, 2, 2, left_node, right_node); \
if (OB_UNLIKELY(NULL == ret_node)) \
{ \
result->extra_errno_ = OB_PARSER_ERR_NO_MEMORY; \
yyerror(NULL, result, "No more space for malloc\n"); \
YYABORT; \
} \
} \
} while(0); \
#define adjust_inner_join(result, ret_node, inner_join, table_node) \
do { \
ret_node = NULL; \

View File

@ -112,10 +112,13 @@ INTERVAL {
_UTF8 { REPUT_TOKEN_NEG_SIGN(_UTF8); }
_UTF8MB4 { REPUT_TOKEN_NEG_SIGN(_UTF8MB4); }
_UTF8MB3 { REPUT_TOKEN_NEG_SIGN(_UTF8MB3); }
_GBK { REPUT_TOKEN_NEG_SIGN(_GBK); }
_GB18030 { REPUT_TOKEN_NEG_SIGN(_GB18030); }
_GB18030_2022 { REPUT_TOKEN_NEG_SIGN(_GB18030_2022); }
_LATIN1 { REPUT_TOKEN_NEG_SIGN(_LATIN1); }
_ASCII { REPUT_TOKEN_NEG_SIGN(_ASCII); }
_TIS620 { REPUT_TOKEN_NEG_SIGN(_TIS620); }
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }
NOT {

File diff suppressed because it is too large Load Diff