467 lines
16 KiB
C
467 lines
16 KiB
C
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#ifndef OCEANBASE_SQL_PARSER_PARSE_NODE_H_
|
|
#define OCEANBASE_SQL_PARSER_PARSE_NODE_H_
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <setjmp.h>
|
|
#ifdef SQL_PARSER_COMPILATION
|
|
#include "ob_sql_mode.h"
|
|
#include "ob_item_type.h"
|
|
#else
|
|
#include "common/sql_mode/ob_sql_mode.h"
|
|
#include "objit/common/ob_item_type.h"
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define MAX_ERROR_MSG 1024
|
|
|
|
struct ObCharsetInfo;
|
|
|
|
enum SelectParserOffset
|
|
{
|
|
PARSE_SELECT_WITH,
|
|
PARSE_SELECT_DISTINCT,
|
|
PARSE_SELECT_SELECT,
|
|
PARSE_SELECT_INTO, //into before from
|
|
PARSE_SELECT_FROM,
|
|
PARSE_SELECT_WHERE,
|
|
PARSE_SELECT_DYNAMIC_SW_CBY, // connect by node or start with node
|
|
PARSE_SELECT_DYNAMIC_CBY_SW, // connect by node or start with node
|
|
PARSE_SELECT_DYNAMIC_GROUP,
|
|
PARSE_SELECT_DYNAMIC_HAVING,
|
|
PARSE_SELECT_NAMED_WINDOWS,
|
|
PARSE_SELECT_SET,
|
|
PARSE_SELECT_FORMER,
|
|
PARSE_SELECT_LATER,
|
|
PARSE_SELECT_ORDER,
|
|
PARSE_SELECT_LIMIT,
|
|
PARSE_SELECT_FOR_UPD,
|
|
PARSE_SELECT_HINTS,
|
|
PARSE_SELECT_WHEN,
|
|
PARSE_SELECT_FETCH,
|
|
PARSE_SELECT_FETCH_TEMP, //use to temporary store fetch clause in parser
|
|
PARSE_SELECT_WITH_CHECK_OPTION,
|
|
PARSE_SELECT_INTO_EXTRA,// ATTENTION!! SELECT_INTO_EXTRA must be the last one
|
|
PARSE_SELECT_MAX_IDX
|
|
};
|
|
|
|
enum GrantParseOffset
|
|
{
|
|
PARSE_GRANT_ROLE_LIST,
|
|
PARSE_GRANT_ROLE_GRANTEE,
|
|
PARSE_GRANT_ROLE_OPT_WITH,
|
|
PARSE_GRANT_ROLE_MAX_IDX
|
|
};
|
|
|
|
enum GrantParseSysOffset
|
|
{
|
|
PARSE_GRANT_SYS_PRIV_ORACLE_LIST,
|
|
PARSE_GRANT_SYS_PRIV_ORACLE_GRANTEE,
|
|
PARSE_GRANT_SYS_PRIV_ORACLE_OPT_WITH,
|
|
PARSE_GRANT_SYS_PRIV_ORACLE_MAX_IDX
|
|
};
|
|
|
|
enum ParseMode
|
|
{
|
|
STD_MODE = 0,
|
|
FP_MODE, /* fast parse,保留hint,且做参数化*/
|
|
MULTI_MODE ,/* multi query ultra-fast parse */
|
|
FP_PARAMERIZE_AND_FILTER_HINT_MODE,/*过滤掉hint,并且做参数化*/
|
|
FP_NO_PARAMERIZE_AND_FILTER_HINT_MODE,/*过滤掉hint,并且不做参数化*/
|
|
TRIGGER_MODE, /* treat ':xxx' as identifier */
|
|
DYNAMIC_SQL_MODE, /*解析动态sql过程中,:idx和:identifier要根据语句类型确定是否检查placeholder的名字*/
|
|
DBMS_SQL_MODE,
|
|
UDR_SQL_MODE,
|
|
INS_MULTI_VALUES,
|
|
};
|
|
|
|
typedef struct
|
|
{
|
|
int err_code_;
|
|
char err_msg_[MAX_ERROR_MSG];
|
|
} ErrStat;
|
|
|
|
struct _ParseNode;
|
|
|
|
typedef struct _ObStmtLoc
|
|
{
|
|
int first_column_;
|
|
int last_column_;
|
|
int first_line_;
|
|
int last_line_;
|
|
} ObStmtLoc;
|
|
|
|
enum UdtUdfType
|
|
{
|
|
UDT_UDF_UNKNOWN,
|
|
UDT_UDF_CONS = 1,
|
|
UDT_UDF_MEMBER = 2,
|
|
UDT_UDF_STATIC = 4,
|
|
UDT_UDF_MAP = 8,
|
|
UDT_UDF_ORDER = 16,
|
|
};
|
|
|
|
typedef struct _ParseNode
|
|
{
|
|
ObItemType type_;
|
|
int32_t num_child_; /* attributes for non-terninal node, which has children */
|
|
int16_t param_num_; //记录该node对应的原始text中常量的个数, 暂时仅T_CAST_ARGUMENT使用
|
|
union {
|
|
uint32_t flag_;
|
|
struct {
|
|
uint32_t is_neg_ : 1;// 记录常量节点的父节点是否为T_OP_NEG节点, 1表示是, 0 表示不是
|
|
uint32_t is_hidden_const_ : 1; //1 表示某常量正常parse能识别但fast parse不能识别, 0 表示都能识别。
|
|
uint32_t is_tree_not_param_ :1; //1 表示该节点及其子节点常量均不能参数化, 0表示没该限制
|
|
uint32_t length_semantics_ :2; //2 for oracle [char|varbinary] (n b [bytes|char])
|
|
uint32_t is_val_paramed_item_idx_ :1; // T_PROJECT_STRING的values是否是select_item_param_infos数组的下标
|
|
uint32_t is_copy_raw_text_ : 1; // 是否回填常量节点的raw_text_,用于select item常量参数化
|
|
uint32_t is_column_varchar_ : 1; // 投影列是否是一个常量字符串,用于select item常量参数化
|
|
uint32_t is_trans_from_minus_: 1; // 负数常量节点是否是从减号操作转换而来,比如1 - 2,词法阶段会生成一个-2
|
|
uint32_t is_assigned_from_child_: 1; // 常量节点是否由子节点赋值得到,用于处理int64_min
|
|
uint32_t is_num_must_be_pos_: 1; //
|
|
uint32_t is_date_unit_ : 1; //1 表示是date unit常量,在反拼的时候需要反拼为字符串
|
|
uint32_t is_literal_bool_ : 1; // indicate node is a literal TRUE/FALSE
|
|
uint32_t is_empty_ : 1; // 表示是否缺省该节点,1表示缺省,0表示没有缺省, opt_asc_desc节点中使用到
|
|
uint32_t is_multiset_ : 1; // for cast(multiset(...) as ...)
|
|
uint32_t is_forbid_anony_parameter_ : 1; // 1 表示禁止匿名块参数化
|
|
uint32_t is_input_quoted_ : 1; // indicate name_ob input whether with double quote
|
|
uint32_t is_forbid_parameter_ : 1; //1 indicate forbid parameter
|
|
uint32_t reserved_;
|
|
};
|
|
};
|
|
/* attributes for terminal node, it is real value */
|
|
/* 数值类型的node将用到value_来存放其值,但是对于字符串和decimal类型,用str_value来存字符串指针,
|
|
* str_len_表示字符串的长度,不要用strlen(str_value_)来获取str_value_的值,因为str_value_不保证以'\0'结尾.
|
|
* 此外,为什么不将value_和str_len_作为union呢,这是因为在parse
|
|
* 一个数值类型的时候,不仅需要存储其value,还需要存储其原始字符串,举例:select
|
|
* 1111;这种语句,我们不仅要存int value的值,还得存'1111' 字符串*/
|
|
union {
|
|
int64_t value_;
|
|
int32_t int32_values_[2];
|
|
int16_t int16_values_[4];
|
|
};
|
|
const char *str_value_;
|
|
int64_t str_len_;
|
|
union {
|
|
int64_t pl_str_off_; // pl层, 记录str在原始字符串中的起始偏移
|
|
int64_t sql_str_off_; // sql层, 记录str在原始字符串中的起始偏移
|
|
};
|
|
|
|
/* 用于存放在词法阶段被特殊处理后丢失的文本串 eg: NULL, Date '2010-10-11',
|
|
* 该文本串在fast parse参数化后,如果该参数作为plan cache中stmtkey的一部分,
|
|
* 则需要使用原始的文本串,而不是丢失文本串后的值,否则会导致plan cache误匹配
|
|
* */
|
|
const char *raw_text_;
|
|
int64_t text_len_;
|
|
int64_t pos_; //记录?在带?的sql中的偏移
|
|
|
|
struct _ParseNode **children_; /* attributes for non-terminal node, which has children */
|
|
ObStmtLoc stmt_loc_; //临时放在这里,后面要移到parse_stmt_node.h中去
|
|
union {
|
|
int64_t raw_param_idx_; // 常量节点在fp_result.raw_params_中的下标
|
|
int64_t raw_sql_offset_; // 常量节点在sql中的字符偏移
|
|
};
|
|
|
|
#ifdef SQL_PARSER_COMPILATION
|
|
int token_off_;
|
|
int token_len_;
|
|
#endif
|
|
} ParseNode;
|
|
|
|
struct _ParamList;
|
|
|
|
typedef struct _ParamList
|
|
{
|
|
ParseNode *node_;
|
|
struct _ParamList *next_;
|
|
} ParamList;
|
|
|
|
//供parser使用的外部依赖对象类型
|
|
enum RefType
|
|
{
|
|
REF_REL = 0,
|
|
REF_PROC,
|
|
REF_FUNC,
|
|
};
|
|
|
|
//外部依赖对象链表
|
|
typedef struct _RefObjList
|
|
{
|
|
enum RefType type_;
|
|
ParseNode *node_;
|
|
struct _RefObjList *next_;
|
|
} RefObjList;
|
|
|
|
//解析PL中sql语句时需要使用的属性集合
|
|
typedef struct _PLParseInfo
|
|
{
|
|
bool is_pl_parse_;//用于标识当前parser逻辑是否为PLParse调用
|
|
bool is_pl_parse_expr_; //用于标识当前parser逻辑是否在解析PLParser的expr
|
|
bool is_forbid_pl_fp_;
|
|
bool is_inner_parse_;
|
|
int last_pl_symbol_pos_; //上一个pl变量的结束位置
|
|
int plsql_line_;
|
|
/*for mysql pl*/
|
|
void *pl_ns_; //ObPLBlockNS
|
|
RefObjList *ref_object_nodes_; //依赖对象链表头
|
|
RefObjList *tail_ref_object_node_; //依赖对象链表尾
|
|
} PLParseInfo;
|
|
|
|
//跟@如巅讨论,此处的定义后续会改成动态的,此处先定义128
|
|
#define MAX_QUESTION_MARK 128
|
|
|
|
typedef struct _ObQuestionMarkCtx
|
|
{
|
|
char **name_;
|
|
int count_;
|
|
int capacity_;
|
|
bool by_ordinal_;
|
|
bool by_name_;
|
|
bool by_defined_name_;
|
|
} ObQuestionMarkCtx;
|
|
|
|
|
|
// record the minus status while parsing the sql
|
|
// for example, 'select - -1 from dual'
|
|
// when parser sees the first '-', pos_ = 7, raw_sql_offset = 7, has_minus_ = true, is_cur_numeric_ = false
|
|
// after seeing the second '-', members are reseted, pos_ = 9, raw_sql_offset_ = 9, has_minus_ = true, is_cur_numeric = false
|
|
// after seeing '1', is_cur_numeric = true, then param node '-1' is returned
|
|
typedef struct _ObMinusStatuCtx
|
|
{
|
|
int pos_; // 负数在参数化后的sql中出现的位置
|
|
int raw_sql_offset_; // 负号在原始sql中出现的位置
|
|
bool has_minus_; // 保留一下负号的状态,在遇到数值类型的时候,词法返回一个负数节点
|
|
bool is_cur_numeric_; // 当前常量节点是否是数值节点
|
|
} ObMinusStatusCtx;
|
|
|
|
#ifdef SQL_PARSER_COMPILATION
|
|
// for comment_list_ in ParseResult
|
|
typedef struct TokenPosInfo
|
|
{
|
|
int token_off_;
|
|
int token_len_;
|
|
} TokenPosInfo;
|
|
#endif
|
|
|
|
//外部依赖对象链表
|
|
typedef struct _ParenthesesOffset
|
|
{
|
|
int left_parentheses_;
|
|
int right_parentheses_;
|
|
struct _ParenthesesOffset *next_;
|
|
} ParenthesesOffset;
|
|
|
|
//dml base runtime context definition
|
|
typedef struct _InsMultiValuesResult
|
|
{
|
|
ParenthesesOffset *ref_parentheses_;
|
|
ParenthesesOffset *tail_parentheses_;
|
|
int values_col_;
|
|
int values_count_;
|
|
int on_duplicate_pos_; // the start position of on duplicate key in insert ... on duplicate key update statement
|
|
int ret_code_;
|
|
} InsMultiValuesResult;
|
|
|
|
|
|
typedef struct
|
|
{
|
|
void *yyscan_info_;
|
|
const char *input_sql_;
|
|
int input_sql_len_;
|
|
int param_node_num_;
|
|
int token_num_;
|
|
void *malloc_pool_; // ObIAllocator
|
|
ObQuestionMarkCtx question_mark_ctx_;
|
|
ObSQLMode sql_mode_;
|
|
const struct ObCharsetInfo *charset_info_; //client charset
|
|
const struct ObCharsetInfo *charset_info_oracle_db_; //oracle DB charset
|
|
ParamList *param_nodes_;
|
|
ParamList *tail_param_node_;
|
|
struct {
|
|
uint32_t has_encount_comment_ : 1;
|
|
uint32_t is_fp_ : 1;
|
|
uint32_t is_multi_query_ : 1;
|
|
uint32_t is_ignore_hint_ : 1;//used for outline
|
|
uint32_t is_ignore_token_ : 1;//used for outline
|
|
uint32_t need_parameterize_ : 1;//used for outline, to support signature of outline can contain hint
|
|
uint32_t in_q_quote_ : 1;
|
|
uint32_t is_for_trigger_ : 1;
|
|
uint32_t is_dynamic_sql_ : 1;
|
|
uint32_t is_dbms_sql_ : 1;
|
|
uint32_t is_batched_multi_enabled_split_ : 1;
|
|
uint32_t is_not_utf8_connection_ : 1;
|
|
uint32_t may_bool_value_ : 1; // used for true/false in sql parser
|
|
uint32_t is_include_old_new_in_trigger_ : 1;
|
|
uint32_t is_normal_ps_prepare_ : 1;
|
|
uint32_t is_multi_values_parser_ : 1;
|
|
uint32_t is_for_udr_ : 1;
|
|
uint32_t is_for_remap_ : 1;
|
|
uint32_t contain_sensitive_data_ : 1;
|
|
uint32_t may_contain_sensitive_data_ : 1;
|
|
uint32_t is_external_table_ : 1;
|
|
};
|
|
|
|
ParseNode *result_tree_;
|
|
jmp_buf *jmp_buf_;//handle fatal error
|
|
int extra_errno_;
|
|
char *error_msg_;
|
|
int start_col_;
|
|
int end_col_;
|
|
int line_;
|
|
int yycolumn_;
|
|
int yylineno_;
|
|
char *tmp_literal_;
|
|
/* for multi query fast parse (split queries) */
|
|
char *no_param_sql_;
|
|
int no_param_sql_len_;
|
|
int no_param_sql_buf_len_;
|
|
/*for pl*/
|
|
PLParseInfo pl_parse_info_;
|
|
/*for q-quote*/
|
|
ObMinusStatusCtx minus_ctx_; // for fast parser to parse negative value
|
|
int64_t last_escape_check_pos_; //解析quoted string%parse-param时的一个临时变量,处理连接gbk字符集时遇到的转义字符问题
|
|
int connection_collation_;//connection collation
|
|
bool mysql_compatible_comment_; //whether the parser is parsing "/*! xxxx */"
|
|
bool enable_compatible_comment_;
|
|
int semicolon_start_col_;
|
|
|
|
InsMultiValuesResult *ins_multi_value_res_;
|
|
|
|
|
|
#ifdef SQL_PARSER_COMPILATION
|
|
TokenPosInfo *comment_list_;
|
|
int comment_cnt_;
|
|
int comment_cap_;
|
|
int realloc_cnt_;
|
|
bool stop_add_comment_;
|
|
#endif
|
|
} ParseResult;
|
|
|
|
typedef struct _ObFastParseCtx
|
|
{
|
|
bool is_fp_;
|
|
} ObFastParseCtx;
|
|
|
|
typedef enum ObSizeUnitType
|
|
{
|
|
SIZE_UNIT_TYPE_INVALID = -1,
|
|
SIZE_UNIT_TYPE_K,
|
|
SIZE_UNIT_TYPE_M,
|
|
SIZE_UNIT_TYPE_G,
|
|
SIZE_UNIT_TYPE_T,
|
|
SIZE_UNIT_TYPE_P,
|
|
SIZE_UNIT_TYPE_E,
|
|
SIZE_UNIT_TYPE_MAX
|
|
} ObSizeUnitType;
|
|
|
|
extern int parse_init(ParseResult *p);
|
|
extern int parse_terminate(ParseResult *p);
|
|
extern int parse_sql(ParseResult *p, const char *pszSql, size_t iLen);
|
|
extern void destroy_tree(ParseNode *pRoot);
|
|
extern unsigned char escaped_char(unsigned char c, int *with_back_slash);
|
|
extern char *str_tolower(char *buff, int64_t len);
|
|
extern char *str_toupper(char *buff, int64_t len);
|
|
extern int64_t str_remove_space(char *buff, int64_t len);
|
|
//extern int64_t ob_parse_string(const char *src, char *dest, int64_t len, int quote_type);
|
|
|
|
extern ParseNode *new_node(void *malloc_pool, ObItemType type, int num);
|
|
extern ParseNode *new_non_terminal_node(void *malloc_pool, ObItemType node_tag, int num, ...);
|
|
extern ParseNode *new_terminal_node(void *malloc_pool, ObItemType type);
|
|
|
|
extern int obpl_parser_check_stack_overflow();
|
|
|
|
int get_deep_copy_size(const ParseNode *node, int64_t *size);
|
|
int deep_copy_parse_node(void *malloc_pool, const ParseNode *src, ParseNode *dst);
|
|
|
|
/// convert x'42ab' to binary string
|
|
void ob_parse_binary(const char *src, int64_t len, char* dest);
|
|
int64_t ob_parse_binary_len(int64_t len);
|
|
|
|
// convert b'10010110' to binary string
|
|
// @pre dest buffer is enough
|
|
void ob_parse_bit_string(const char* src, int64_t len, char* dest);
|
|
int64_t ob_parse_bit_string_len(int64_t len);
|
|
|
|
// calculate hash value of syntax tree recursively
|
|
// @param [in] node syntax tree root
|
|
// @return hash value of syntax tree
|
|
extern uint64_t parsenode_hash(const ParseNode *node, int *ret);
|
|
// compare syntax tree recursively
|
|
// @param [in] node1 first syntax tree
|
|
// @param [in] node2 second syntax tree
|
|
extern bool parsenode_equal(const ParseNode *node1, const ParseNode *node2, int *ret);
|
|
|
|
extern int64_t get_question_mark(ObQuestionMarkCtx *ctx, void *malloc_pool, const char *name);
|
|
extern int64_t get_question_mark_by_defined_name(ObQuestionMarkCtx *ctx, const char *name);
|
|
extern ParseNode *adjust_inner_join_inner(int *error_code, ParseNode *inner_join, ParseNode *table_node);
|
|
|
|
// compare ParseNode str_value_ to pattern
|
|
// @param [in] node ParseNode
|
|
// @param [in] pattern pattern_str
|
|
// @param [in] pat_len length of pattern
|
|
extern bool nodename_equal(const ParseNode *node, const char *pattern, int64_t pat_len);
|
|
|
|
extern bool nodename_is_sdo_geometry_type(const ParseNode *node);
|
|
#define OB_NODE_CAST_TYPE_IDX 0
|
|
#define OB_NODE_CAST_COLL_IDX 1
|
|
#define OB_NODE_CAST_N_PREC_IDX 2
|
|
#define OB_NODE_CAST_N_SCALE_IDX 3
|
|
#define OB_NODE_CAST_NUMBER_TYPE_IDX 1
|
|
#define OB_NODE_CAST_C_LEN_IDX 1
|
|
#define OB_NODE_CAST_GEO_TYPE_IDX 1
|
|
|
|
typedef enum ObNumberParseType
|
|
{
|
|
NPT_PERC_SCALE = 0,
|
|
NPT_STAR_SCALE,
|
|
NPT_STAR,
|
|
NPT_PERC,
|
|
NPT_EMPTY,
|
|
} ObNumberParseType;
|
|
|
|
#ifndef SQL_PARSER_COMPILATION
|
|
bool check_stack_overflow_c();
|
|
//查找外部pl变量的接口,获取变量在外部符号表中的下标,定义在ob_pl_stmt.cpp中
|
|
int lookup_pl_symbol(const void *pl_ns, const char *symbol, size_t len, int64_t *find_idx);
|
|
#endif
|
|
|
|
typedef struct _ParserLinkNode
|
|
{
|
|
struct _ParserLinkNode *next_;
|
|
struct _ParserLinkNode *prev_;
|
|
void *val_;
|
|
} ParserLinkNode;
|
|
|
|
ParserLinkNode *new_link_node(void *malloc);
|
|
|
|
typedef enum ObTranslateCharset
|
|
{
|
|
TRANSLATE_CHAR_CS = 0,
|
|
TRANSLATE_NCHAR_CS = 1,
|
|
} ObTranslateCharset;
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif //OCEANBASE_SQL_PARSER_PARSE_NODE_H_
|