openGauss-server/src/common/backend/parser/scan.l

%{
/* -------------------------------------------------------------------------
 *
 * scan.l
 *	  lexical scanner for PostgreSQL
 *
 * NOTE NOTE NOTE:
 *
 * The rules in this file must be kept in sync with psql's lexer!!!
 *
 * The rules are designed so that the scanner never has to backtrack,
 * in the sense that there is always a rule that can match the input
 * consumed so far (the rule action may internally throw back some input
 * with yyless(), however).  As explained in the flex manual, this makes
 * for a useful speed increase --- about a third faster than a plain -CF
 * lexer, in simple testing.  The extra complexity is mostly in the rules
 * for handling float numbers and continued string literals.  If you change
 * the lexical rules, verify that you haven't broken the no-backtrack
 * property by running flex with the "-b" option and checking that the
 * resulting "lex.backup" file says that no backing up is needed.  (As of
 * Postgres 9.2, this check is made automatically by the Makefile.)
 *
 *
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *	  src/backend/parser/scan.l
 *
 * -------------------------------------------------------------------------
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include <ctype.h>
#include <unistd.h>

#include "parser/parser.h"				/* only needed for GUC variables */
#include "parser/scanner.h"
#include "parser/scansup.h"
#include "mb/pg_wchar.h"
#include "utils/pl_package.h"
#include "utils/plpgsql.h"

/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
#define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))

/*
 * Set the type of YYSTYPE.
 */
#define YYSTYPE core_YYSTYPE

/*
 * define core_yylex for flex >= 2.6
 */
#if FLEX_MAJOR_VERSION >= 2 && FLEX_MINOR_VERSION >= 6
#define YY_DECL int core_yylex \
               (YYSTYPE * yylval_param, YYLTYPE * yylloc_param , yyscan_t yyscanner)
#endif

/*
 * Set the type of yyextra.  All state variables used by the scanner should
 * be in yyextra, *not* statically allocated.
 */
#define YY_EXTRA_TYPE core_yy_extra_type *

long getDynaParamSeq(const char *string, bool initflag, bool placeholder, core_yyscan_t yyscanner);

/*
 * Each call to yylex must set yylloc to the location of the found token
 * (expressed as a byte offset from the start of the input text).
 * When we parse a token that requires multiple lexer rules to process,
 * this should be done in the first such rule, else yylloc will point
 * into the middle of the token.
 */
#define SET_YYLLOC()  (*(yylloc) = yytext - yyextra->scanbuf)

#define COMMENT_NOT_IGNORED()  (yyextra->is_hint_str || yyextra->include_ora_comment)
/*
 * Advance yylloc by the given number of bytes.
 */
#define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )

#define startlit()  ( yyextra->literallen = 0 )
static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
static char *litbufdup(core_yyscan_t yyscanner);
static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner);
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
static int	process_integer_literal(const char *token, YYSTYPE *lval);
static bool is_utf16_surrogate_first(pg_wchar c);
static bool is_utf16_surrogate_second(pg_wchar c);
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
static void addunicode(pg_wchar c, yyscan_t yyscanner);
static void set_is_delimiter_name(char* text, core_yyscan_t yyscanner );

#define yyerror(msg)  scanner_yyerror(msg, yyscanner)

#define lexer_errposition()  scanner_errposition(*(yylloc), yyscanner)

static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner);
static void check_escape_warning(core_yyscan_t yyscanner);
static bool is_trans_stmt(const char *haystack, int haystack_len);
/*
 * Work around a bug in flex 2.5.35: it emits a couple of functions that
 * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
 * this would cause warnings.  Providing our own declarations should be
 * harmless even when the bug gets fixed.
 */
extern int	core_yyget_column(yyscan_t yyscanner);
extern void core_yyset_column(int column_no, yyscan_t yyscanner);

%}

%option reentrant
%option bison-bridge
%option bison-locations
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option warn
%option prefix="core_yy"

/*
 * OK, here is a short description of lex/flex rules behavior.
 * The longest pattern which matches an input string is always chosen.
 * For equal-length patterns, the first occurring in the rules list is chosen.
 * INITIAL is the starting state, to which all non-conditional rules apply.
 * Exclusive states change parsing rules while the state is active.  When in
 * an exclusive state, only those rules defined for that state apply.
 *
 * We use exclusive states for quoted strings, extended comments,
 * and to eliminate parsing troubles for numeric strings.
 * Exclusive states:
 *  <xb> bit string literal
 *  <xc> extended C-style comments
 *  <xd> delimited identifiers (double-quoted identifiers)
 *  <xh> hexadecimal numeric string
 *  <xq> standard quoted strings
 *  <xe> extended quoted strings (support backslash escape sequences)
 *  <xdolq> $foo$ quoted strings
 *  <xui> quoted identifier with Unicode escapes
 *  <xus> quoted string with Unicode escapes
 *  <xeu> Unicode surrogate pair in extended quoted string
 */

%x xb
%x xc
%x xd
%x xh
%x xe
%x xq
%x xdolq
%x xui
%x xus
%x xeu

/*
 * In order to make the world safe for Windows and Mac clients as well as
 * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
 * sequence will be seen as two successive newlines, but that doesn't cause
 * any problems.  Comments that start with -- and extend to the next
 * newline are treated as equivalent to a single whitespace character.
 *
 * NOTE a fine point: if there is no newline following --, we will absorb
 * everything to the end of the input as a comment.  This is correct.  Older
 * versions of Postgres failed to recognize -- as a comment if the input
 * did not end with a newline.
 *
 * XXX perhaps \f (formfeed) should be treated as a newline as well?
 *
 * XXX if you change the set of whitespace characters, fix scanner_isspace()
 * to agree, and see also the plpgsql lexer.
 */

space			[ \t\n\r\f]
horiz_space		[ \t\f]
newline			[\n\r]
non_newline		[^\n\r]
comment			("--"{non_newline}*)

whitespace		({space}+|{comment})
whitespace_only	({space}+)

/*
 * SQL requires at least one newline in the whitespace separating
 * string literals that are to be concatenated.  Silly, but who are we
 * to argue?  Note that {whitespace_with_newline} should not have * after
 * it, whereas {whitespace} should generally have a * after it...
 */

special_whitespace		({space}+|{comment}{newline})
horiz_whitespace		({horiz_space}|{comment})
whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)

/*
 * To ensure that {quotecontinue} can be scanned without having to back up
 * if the full pattern isn't matched, we include trailing whitespace in
 * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
 * except for {quote} followed by whitespace and just one "-" (not two,
 * which would start a {comment}).  To cover that we have {quotefail}.
 * The actions for {quotestop} and {quotefail} must throw back characters
 * beyond the quote proper.
 */
quote			'
quotestop		{quote}{whitespace}*
quotecontinue	{quote}{whitespace_with_newline}{quote}
quotefail		{quote}{whitespace}*"-"

/* Bit string
 * It is tempting to scan the string for only those characters
 * which are allowed. However, this leads to silently swallowed
 * characters if illegal characters are included in the string.
 * For example, if xbinside is [01] then B'ABCD' is interpreted
 * as a zero-length string, and the ABCD' is lost!
 * Better to pass the string forward and let the input routines
 * validate the contents.
 */
xbstart			[bB]{quote}
xbinside		[^']*

/* Hexadecimal number */
xhstart			[xX]{quote}
xhinside		[^']*

/* National character */
xnstart			[nN]{quote}

/* Quoted string that allows backslash escapes */
xestart			[eE]{quote}
xeinside		[^\\']+
xeescape		[\\][^0-7]
xeoctesc		[\\][0-7]{1,3}
xehexesc		[\\]x[0-9A-Fa-f]{1,2}
xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})

/* Extended quote
 * xqdouble implements embedded quote, ''''
 */
xqstart			{quote}
xqdouble		{quote}{quote}
xqinside		[^']+

/* $foo$ style quotes ("dollar quoting")
 * The quoted string starts with $foo$ where "foo" is an optional string
 * in the form of an identifier, except that it may not contain "$",
 * and extends to the first occurrence of an identical string.
 * There is *no* processing of the quoted text.
 *
 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
 * fails to match its trailing "$".
 */
dolq_start		[A-Za-z\200-\377_]
dolq_cont		[A-Za-z\200-\377_0-9]
dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
dolqfailed		\${dolq_start}{dolq_cont}*
dolqinside		[^$]+

/* Double quote
 * Allows embedded spaces and other special characters into identifiers.
 */
dquote			\"
xdstart			{dquote}
xdstop			{dquote}
xddouble		{dquote}{dquote}
xdinside		[^"]+

/* Unicode escapes */
uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
/* error rule to avoid backup */
uescapefail		("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])

/* Quoted identifier with Unicode escapes */
xuistart		[uU]&{dquote}
xuistop1		{dquote}{whitespace}*{uescapefail}?
xuistop2		{dquote}{whitespace}*{uescape}

/* Quoted string with Unicode escapes */
xusstart		[uU]&{quote}
xusstop1		{quote}{whitespace}*{uescapefail}?
xusstop2		{quote}{whitespace}*{uescape}

/* error rule to avoid backup */
xufailed		[uU]&


/* C-style comments
 *
 * The "extended comment" syntax closely resembles allowable operator syntax.
 * The tricky part here is to get lex to recognize a string starting with
 * slash-star as a comment, when interpreting it as an operator would produce
 * a longer match --- remember lex will prefer a longer match!  Also, if we
 * have something like plus-slash-star, lex will think this is a 3-character
 * operator whereas we want to see it as a + operator and a comment start.
 * The solution is two-fold:
 * 1. append {op_chars}* to xcstart so that it matches as much text as
 *    {operator} would. Then the tie-breaker (first matching rule of same
 *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
 *    in case it contains a star-slash that should terminate the comment.
 * 2. In the operator rule, check for slash-star within the operator, and
 *    if found throw it back with yyless().  This handles the plus-slash-star
 *    problem.
 * Dash-dash comments have similar interactions with the operator rule.
 */
xcstart			\/\*{op_chars}*
xcstop			\*+\/
xcinside		[^*/]+

digit			[0-9]
ident_start		[A-Za-z\200-\377_]
ident_cont		[A-Za-z\200-\377_0-9\$\#]

identifier		{ident_start}{ident_cont}*

typecast		"::"
plus_join		"(+)"
dot_dot			\.\.
colon_equals	":="
para_equals	"=>"

set_ident_start	"@@"
set_ident_cont	[A-Za-z\200-\377_0-9\$\#]
set_identifier	{set_ident_start}{set_ident_cont}*

/*
 * "self" is the set of chars that should be returned as single-character
 * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
 * which can be one or more characters long (but if a single-char token
 * appears in the "self" set, it is not to be returned as an Op).  Note
 * that the sets overlap, but each has some chars that are not in the other.
 *
 * If you change either set, adjust the character lists appearing in the
 * rule for "operator"!
 */
self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=\@]
op_chars		[\~\!\#\^\&\|\`\?\+\-\*\/\%\<\>\=\@]
operator		{op_chars}+

/* we no longer allow unary minus in numbers.
 * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999
 *
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */

integer			{digit}+
decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
decimalfail		{digit}+\.\.
real			({integer}|{decimal})[Ee][-+]?{digit}+
realfail1		({integer}|{decimal})[Ee]
realfail2		({integer}|{decimal})[Ee][-+]
snapvers		({digit}+(\.{digit}+)*)

param			\${integer}

newParam		:({identifier}|{integer})

newArray		:({identifier}|{integer}){space}*\]

set_user_cont			[A-Za-z\377_0-9\$\.]
set_user_escape_quote		[^']
set_user_escape_dquote		[^"]
set_user_escape_bquote		[^`]
setUserIdentifier		@(({set_user_cont}+)|(\'{set_user_escape_quote}+\')|(\"{set_user_escape_dquote}+\")|(\`{set_user_escape_bquote}+\`))

other			.

/*
 * Dollar quoted strings are totally opaque, and no escaping is done on them.
 * Other quoted strings must allow some special characters such as single-quote
 *  and newline.
 * Embedded single-quotes are implemented both in the SQL standard
 *  style of two adjacent single quotes "''" and in the Postgres/Java style
 *  of escaped-quote "\'".
 * Other embedded escaped characters are matched explicitly and the leading
 *  backslash is dropped from the string.
 * Note that xcstart must appear before operator, as explained above!
 *  Also whitespace (comment) must appear before operator.
 */

%%

{whitespace_only}	{
					/* ignore */
				}

{comment}		{
					if (yyextra->include_ora_comment)
					{
						SET_YYLLOC();
						addlit(yytext, yyleng, yyscanner);
						yylval->str = litbufdup(yyscanner);
						return COMMENTSTRING;
					}
					/* ignore */
				}

{xcstart}	{
					/* Set location in case of syntax error in comment */
					SET_YYLLOC();
					yyextra->xcdepth = 0;
					BEGIN(xc);
					/* Put back any characters past slash-star; see above */
					yyless(2);
					if (COMMENT_NOT_IGNORED())
					{
						startlit();
						addlit(yytext, yyleng, yyscanner);
					}
		}

<xc>{xcstart}	{
					(yyextra->xcdepth)++;
					/* Put back any characters past slash-star; see above */
					yyless(2);
					if (COMMENT_NOT_IGNORED())
					{
						addlit(yytext, yyleng, yyscanner);
					}
		}

<xc>{xcstop}	{
					if (yyextra->xcdepth <= 0)
						BEGIN(INITIAL);
					else
						(yyextra->xcdepth)--;

					if (COMMENT_NOT_IGNORED())
					{
						addlit(yytext, yyleng, yyscanner);
						yylval->str = litbufdup(yyscanner);
						yyextra->is_hint_str = false;
						return COMMENTSTRING;
					}
		}

<xc>{xcinside}	{
					if (COMMENT_NOT_IGNORED())
					{
						addlit(yytext, yyleng, yyscanner);
					}
		}

<xc>{op_chars}	{
					if (COMMENT_NOT_IGNORED())
					{
						addlit(yytext, yyleng, yyscanner);
					}
		}

<xc>\*+		{
					if (COMMENT_NOT_IGNORED())
					{
						addlit(yytext, yyleng, yyscanner);
					}
		}

<xc><<EOF>>		{ yyerror("unterminated /* comment"); return 0;}

{xbstart}		{
					/* Binary bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "b" on the string
					 * to mark it for the input routine as a binary string.
					 */
					SET_YYLLOC();
					BEGIN(xb);
					startlit();
					addlitchar('b', yyscanner);
				}
<xb>{quotestop}	|
<xb>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = false;
					return BCONST;
		}
<xh>{xhinside}	|
<xb>{xbinside}	{
					addlit(yytext, yyleng, yyscanner);
				}
<xh>{quotecontinue}	|
<xb>{quotecontinue}	{
					/* ignore */
				}
<xb><<EOF>>		{ yyerror("unterminated bit string literal"); return 0;}

{xhstart}		{
					/* Hexadecimal bit type.
					 * At some point we should simply pass the string
					 * forward to the parser and label it there.
					 * In the meantime, place a leading "x" on the string
					 * to mark it for the input routine as a hex string.
					 */
					SET_YYLLOC();
					BEGIN(xh);
					startlit();
					addlitchar('x', yyscanner);
				}
<xh>{quotestop}	|
<xh>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = false;
					return XCONST;
		}
<xh><<EOF>>		{ yyerror("unterminated hexadecimal string literal"); return 0;}

{xnstart}		{
					/* National character.
					 * We will pass this along as a normal character string,
					 * but preceded with an internally-generated "NCHAR".
					 */
					const ScanKeyword *keyword;

					SET_YYLLOC();
					yyless(1);				/* eat only 'n' this time */

					keyword = ScanKeywordLookup("nchar",
												yyextra->keywords,
												yyextra->num_keywords);
					if (keyword != NULL)
					{
						yylval->keyword = keyword->name;
						yyextra->is_hint_str = false;
						return keyword->value;
					}
					else
					{
						/* If NCHAR isn't a keyword, just return "n" */
						yylval->str = pstrdup("n");
						yyextra->ident_quoted = false;
						yyextra->is_hint_str = false;
						return IDENT;
					}
				}

{xqstart}		{
					yyextra->warn_on_first_escape = true;
					yyextra->saw_non_ascii = false;
					SET_YYLLOC();
					if (u_sess->attr.attr_sql.standard_conforming_strings)
						BEGIN(xq);
					else
						BEGIN(xe);
					startlit();
				}
{xestart}		{
					yyextra->warn_on_first_escape = false;
					yyextra->saw_non_ascii = false;
					SET_YYLLOC();
					BEGIN(xe);
					startlit();
				}
{xusstart}		{
					SET_YYLLOC();
					if (!u_sess->attr.attr_sql.standard_conforming_strings)
						ereport(ERROR,
								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
								 errmsg("unsafe use of string constant with Unicode escapes"),
								 errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."),
								 lexer_errposition()));
					BEGIN(xus);
					startlit();
				}
<xq,xe>{quotestop}	|
<xq,xe>{quotefail} {
					yyless(1);
					BEGIN(INITIAL);
					/*
					 * check that the data remains valid if it might have been
					 * made invalid by unescaping any chars.
					 */
					if (yyextra->saw_non_ascii)
						pg_verifymbstr(yyextra->literalbuf,
									   yyextra->literallen,
									   false);
					yylval->str = litbufdup(yyscanner);
					yyextra->is_hint_str = false;
					return SCONST;
				}
<xus>{xusstop1} {
					/* throw back all but the quote */
					yyless(1);
					BEGIN(INITIAL);
					yylval->str = litbuf_udeescape('\\', yyscanner);
					yyextra->is_hint_str = false;
					set_is_delimiter_name(yytext,yyscanner);
					return SCONST;
		}
<xus>{xusstop2} {
					BEGIN(INITIAL);
					yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
					yyextra->is_hint_str = false;
					return SCONST;
		}
<xq,xe,xus>{xqdouble} {
					addlitchar('\'', yyscanner);
				}
<xq,xus>{xqinside}  {
					addlit(yytext, yyleng, yyscanner);
				}
<xe>{xeinside}  {
					addlit(yytext, yyleng, yyscanner);
				}
<xe>{xeunicode} {
					pg_wchar c = strtoul(yytext+2, NULL, 16);

					check_escape_warning(yyscanner);

					if (is_utf16_surrogate_first(c))
					{
						yyextra->utf16_first_part = c;
						BEGIN(xeu);
					}
					else if (is_utf16_surrogate_second(c))
						yyerror("invalid Unicode surrogate pair");
					else
						addunicode(c, yyscanner);
				}
<xeu>{xeunicode} {
					pg_wchar c = strtoul(yytext+2, NULL, 16);

					if (!is_utf16_surrogate_second(c))
						yyerror("invalid Unicode surrogate pair");

					c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);

					addunicode(c, yyscanner);

					BEGIN(xe);
				}
<xeu>.			{ yyerror("invalid Unicode surrogate pair"); }
<xeu>\n			{ yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>>	{ yyerror("invalid Unicode surrogate pair"); }
<xe,xeu>{xeunicodefail}	{
						ereport(ERROR,
								(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
								 errmsg("invalid Unicode escape"),
								 errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
								 lexer_errposition()));
				}
<xe>{xeescape}  {
					if (yytext[1] == '\'')
					{
						if (u_sess->attr.attr_sql.backslash_quote == BACKSLASH_QUOTE_OFF ||
							(u_sess->attr.attr_sql.backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
							 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
							ereport(ERROR,
									(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
									 errmsg("unsafe use of \\' in a string literal"),
									 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
									 lexer_errposition()));
					}
					check_string_escape_warning(yytext[1], yyscanner);
					addlitchar(unescape_single_char(yytext[1], yyscanner),
							   yyscanner);
				}
<xe>{xeoctesc}  {
					unsigned char c = strtoul(yytext+1, NULL, 8);

					check_escape_warning(yyscanner);
					addlitchar(c, yyscanner);
					if (c == '\0' || IS_HIGHBIT_SET(c))
						yyextra->saw_non_ascii = true;
				}
<xe>{xehexesc}  {
					unsigned char c = strtoul(yytext+2, NULL, 16);

					check_escape_warning(yyscanner);
					addlitchar(c, yyscanner);
					if (c == '\0' || IS_HIGHBIT_SET(c))
						yyextra->saw_non_ascii = true;
				}
<xq,xe,xus>{quotecontinue} {
					/* ignore */
				}
<xe>.			{
					/* This is only needed for \ just before EOF */
					addlitchar(yytext[0], yyscanner);
				}
<xq,xe,xus><<EOF>>		{ yyerror("unterminated quoted string"); return 0;}

{dolqdelim}		{
					SET_YYLLOC();
					yyextra->dolqstart = pstrdup(yytext);
					BEGIN(xdolq);
					startlit();
				}
{dolqfailed}	{
					SET_YYLLOC();
					/* throw back all but the initial "$" */
					yyless(1);
					/* and treat it as {other} */
					yyextra->is_hint_str = false;
					return yytext[0];
				}
<xdolq>{dolqdelim} {
					if (strcmp(yytext, yyextra->dolqstart) == 0)
					{
						FREE_POINTER(yyextra->dolqstart);
						yyextra->dolqstart = NULL;
						BEGIN(INITIAL);
						yylval->str = litbufdup(yyscanner);
						yyextra->is_hint_str = false;
						return SCONST;
					}
					else
					{
						/*
						 * When we fail to match $...$ to dolqstart, transfer
						 * the $... part to the output, but put back the final
						 * $ for rescanning.  Consider $delim$...$junk$delim$
						 */
						addlit(yytext, yyleng-1, yyscanner);
						yyless(yyleng-1);
					}
				}
<xdolq>{dolqinside} {
					addlit(yytext, yyleng, yyscanner);
				}
<xdolq>{dolqfailed} {
					addlit(yytext, yyleng, yyscanner);
				}
<xdolq>.		{
					/* This is only needed for $ inside the quoted text */
					addlitchar(yytext[0], yyscanner);
				}
<xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); return 0;}

{xdstart}		{
					SET_YYLLOC();
					BEGIN(xd);
					startlit();
				}
{xuistart}		{
					SET_YYLLOC();
					BEGIN(xui);
					startlit();
				}
<xd>{xdstop}	{
					char		   *ident;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						yyerror("zero-length delimited identifier");
					ident = litbufdup(yyscanner);
					if (yyextra->literallen >= NAMEDATALEN && u_sess->parser_cxt.is_load_copy == false && u_sess->parser_cxt.isForbidTruncate == false)
						truncate_identifier(ident, yyextra->literallen, yyextra->warnOnTruncateIdent);
					set_is_delimiter_name(ident,yyscanner);
					yylval->str = ident;
					yyextra->ident_quoted = true;
					yyextra->is_hint_str = false;
					return IDENT;
				}
<xui>{xuistop1}	{
					char		   *ident;
					int             identlen;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						yyerror("zero-length delimited identifier");
					ident = litbuf_udeescape('\\', yyscanner);
					identlen = strlen(ident);
					if (identlen >= NAMEDATALEN && u_sess->parser_cxt.is_load_copy == false && u_sess->parser_cxt.isForbidTruncate == false)
						truncate_identifier(ident, identlen, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					/* throw back all but the quote */
					yyless(1);
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = false;
					return IDENT;
				}
<xui>{xuistop2}	{
					char		   *ident;
					int             identlen;

					BEGIN(INITIAL);
					if (yyextra->literallen == 0)
						yyerror("zero-length delimited identifier");
					ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
					identlen = strlen(ident);
					if (identlen >= NAMEDATALEN && u_sess->parser_cxt.is_load_copy == false && u_sess->parser_cxt.isForbidTruncate == false)
						truncate_identifier(ident, identlen, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = false;
					return IDENT;
				}
<xd,xui>{xddouble}	{
					addlitchar('"', yyscanner);
				}
<xd,xui>{xdinside}	{
					addlit(yytext, yyleng, yyscanner);
				}
<xd,xui><<EOF>>		{ yyerror("unterminated quoted identifier"); return 0;}

{xufailed}	{
					char		   *ident;

					SET_YYLLOC();
					/* throw back all but the initial u/U */
					yyless(1);
					/* and treat it as {identifier} */
					ident = downcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = false;
					yyextra->is_hint_str = false;
					return IDENT;
				}

{typecast}		{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return TYPECAST;
				}

{plus_join}	{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return ORA_JOINOP;
				}

{dot_dot}		{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return DOT_DOT;
				}

{colon_equals}	{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return COLON_EQUALS;
				}

{para_equals}	{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return PARA_EQUALS;
				}

{self}			{
					SET_YYLLOC();
					/*
					 * Get the semicolon which is not in proc body nor in the '( )', treat it
					 * as end flag of a single query and store it in locationlist.
					 */
					if (yyextra->dolqstart == NULL)
					{
						if (yytext[0] == '(')
							yyextra->paren_depth++;
						else if (yytext[0] == ')' && yyextra->paren_depth > 0)
							yyextra->paren_depth--;
						else if (yytext[0] == ';' && yyextra->paren_depth == 0 && !yyextra->in_slash_proc_body) {
							yyextra->query_string_locationlist = lappend_int(yyextra->query_string_locationlist, *yylloc);
                            /* reset is_createstmt to parse next sql */
                            yyextra->is_createstmt = false;
                        }
						set_is_delimiter_name(yytext,yyscanner);
					}
					yyextra->is_hint_str = false;
					return yytext[0];
				}

{operator}		{
					/*
					 * Check for embedded slash-star or dash-dash; those
					 * are comment starts, so operator must stop there.
					 * Note that slash-star or dash-dash at the first
					 * character will match a prior rule, not this one.
					 */
					int		nchars = yyleng;
					char   *slashstar = strstr(yytext, "/*");
					char   *dashdash = strstr(yytext, "--");

                                        if (u_sess->attr.attr_sql.sql_compatibility == B_FORMAT && u_sess->attr.attr_common.enable_set_variable_b_format)
                                        {
                                                if(nchars > 3 && yytext[0] == '@' && yytext[1] == '`' && yytext[nchars-1] == '`')
                                                {
							char	*subtext = strstr(yytext + 2, "`");
							if(strlen(subtext) == 1)
							{
                                                        	SET_YYLLOC();
                                                        	yylval->str = pstrdup(yytext + 1);
                                                        	yyextra->is_hint_str = false;
                                                        	return SET_USER_IDENT;
							}
                                                }
                                        }

					if (slashstar && dashdash)
					{
						/* if both appear, take the first one */
						if (slashstar > dashdash)
							slashstar = dashdash;
					}
					else if (!slashstar)
						slashstar = dashdash;
					if (slashstar)
						nchars = slashstar - yytext;

					/*
					 * For SQL compatibility, '+' and '-' cannot be the
					 * last char of a multi-char operator unless the operator
					 * contains chars that are not in SQL operators.
					 * The idea is to lex '=-' as two operators, but not
					 * to forbid operator names like '?-' that could not be
					 * sequences of SQL operators.
					 */
					while (nchars > 1 &&
						   (yytext[nchars-1] == '+' ||
							yytext[nchars-1] == '-'))
					{
						int		ic;

						for (ic = nchars-2; ic >= 0; ic--)
						{
							if (strchr("~!#^&|`?%", yytext[ic]))
								break;
						}
						if (ic >= 0)
							break; /* found a char that makes it OK */
						nchars--; /* else remove the +/-, and check again */
					}

					SET_YYLLOC();
					set_is_delimiter_name(yytext,yyscanner);

					if (nchars < (int)yyleng)
					{
						/* Strip the unwanted chars from the token */
						yyless(nchars);
						/*
						 * If what we have left is only one char, and it's
						 * one of the characters matching "self", then
						 * return it as a character token the same way
						 * that the "self" rule would have.
						 */
						if (nchars == 1 &&
							strchr(",()[].;:+-*/%^<>=@", yytext[0]))
						{
							yyextra->is_hint_str = false;
							return yytext[0];
						}
					}

					/*
					 * Complain if operator is too long.  Unlike the case
					 * for identifiers, we make this an error not a notice-
					 * and-truncate, because the odds are we are looking at
					 * a syntactic mistake anyway.
					 */
					if (nchars >= NAMEDATALEN)
						yyerror("operator too long");

					/* Convert "!=" operator to "<>" for compatibility */
					if (strcmp(yytext, "!=") == 0 || strcmp(yytext, "^=") == 0)
					{
						yylval->str = pstrdup("<>");
						yyextra->is_hint_str = false;
						return CmpOp;
					}
					else if (strcmp(yytext, ">=") == 0 || strcmp(yytext, "<=") == 0 || strcmp(yytext, "<>") == 0)
					{
						yylval->str = pstrdup(yytext);
						yyextra->is_hint_str = false;
						return CmpOp;
					}
					else if (strcmp(yytext, "<=>") == 0 && (u_sess->attr.attr_sql.sql_compatibility == B_FORMAT))
					{
						yylval->str = pstrdup(yytext);
						yyextra->is_hint_str = false;
						return CmpNullOp;
					}
					else
						yylval->str = pstrdup(yytext);
					yyextra->is_hint_str = false;
					return Op;
				}
{newArray}		{
					yyless(1);
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return yytext[0];
				}
{param}			{
					SET_YYLLOC();
					yylval->ival = getDynaParamSeq(yytext + 1, false, false, yyscanner);
					yyextra->is_hint_str = false;
					return PARAM;
				}
{newParam}		{
					SET_YYLLOC();
					yylval->ival = getDynaParamSeq(yytext + 1, false, true, yyscanner);
					yyextra->is_hint_str = false;
					return PARAM;
				}
{integer}		{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return process_integer_literal(yytext, yylval);
				}
{decimal}		{
					SET_YYLLOC();
					yylval->str = pstrdup(yytext);
					yyextra->is_hint_str = false;
					return FCONST;
				}
{decimalfail}	{
					/* throw back the .., and treat as integer */
					yyless(yyleng-2);
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return process_integer_literal(yytext, yylval);
				}
{real}			{
					SET_YYLLOC();
					yylval->str = pstrdup(yytext);
					yyextra->is_hint_str = false;
					return FCONST;
				}
{realfail1}		{
					/*
					 * throw back the [Ee], and treat as {decimal}.  Note
					 * that it is possible the input is actually {integer},
					 * but since this case will almost certainly lead to a
					 * syntax error anyway, we don't bother to distinguish.
					 */
					yyless(yyleng-1);
					SET_YYLLOC();
					yylval->str = pstrdup(yytext);
					yyextra->is_hint_str = false;
					return FCONST;
				}
{realfail2}		{
					/* throw back the [Ee][+-], and proceed as above */
					yyless(yyleng-2);
					SET_YYLLOC();
					yylval->str = pstrdup(yytext);
					yyextra->is_hint_str = false;
					return FCONST;
				}
{snapvers}		{
					SET_YYLLOC();
					yylval->str = pstrdup(yytext);
					for (int i = strlen(yylval->str) - 1; i > 0; i--)
					{
						if (yylval->str[i] == '.')
						{
							yylval->str[i] = DB4AI_SNAPSHOT_VERSION_SEPARATOR;
						}
					}
					yyextra->is_hint_str = false;
					return VCONST;
				}

{set_identifier} {
					if (u_sess->attr.attr_sql.sql_compatibility == B_FORMAT && u_sess->attr.attr_common.enable_set_variable_b_format) {
						char		   *set_ident;
						SET_YYLLOC();

						/*
						* No.  Convert the identifier to lower case, and truncate
						* if necessary.
						*/
						set_ident = downcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
						if (strcmp(set_ident, "@@session") == 0) {
							yyextra->is_hint_str = false;
							return SET_IDENT_SESSION;
						} else if (strcmp(set_ident, "@@global") == 0) {
							yyextra->is_hint_str = false;
							return SET_IDENT_GLOBAL;
						} else {
							yylval->str = set_ident;
							yyextra->ident_quoted = false;
							return SET_IDENT;
						}
					} else {
						SET_YYLLOC();
						yyless(2);
						yylval->str = pstrdup(yytext);
						yyextra->is_hint_str = false;
						return Op;
					}
				}

{identifier}	{
					const ScanKeyword *keyword;
					char		   *ident;

					SET_YYLLOC();

					/* Is it a keyword? */
					keyword = ScanKeywordLookup(yytext,
												yyextra->keywords,
												yyextra->num_keywords);

					yyextra->is_hint_str = false;
					bool isPlpgsqlKeyword = yyextra->isPlpgsqlKeyWord;

					if (keyword != NULL)
					{
						yylval->keyword = keyword->name;

						/* Find the CREATE PROCEDURE syntax and set dolqstart. */
						if (keyword->value == CREATE)
						{
							yyextra->is_createstmt = true;
						}
						else if (keyword->value == TRIGGER && yyextra->is_createstmt)
						{
							/* Create trigger don't need set dolqstart */
							yyextra->is_createstmt = false;
						}
						else if ((keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->procedure : PROCEDURE) ||
						    keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->function : FUNCTION))
							&& (yyextra->is_createstmt))
						{
							/* Make yyextra->dolqstart not NULL means its in a proc with $$. */
							yyextra->dolqstart = "";
						}
						else if (keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->begin : BEGIN_P))
						{
                            if (!(u_sess->parser_cxt.isCreateFuncOrProc || u_sess->plsql_cxt.curr_compile_context != NULL)) {
                                /* cases that have to be a trans stmt and fall quickly */
                                if (yyg->yy_hold_char == ';' || /* found ';' after 'begin' */
                                    yyg->yy_hold_char == '\0')  /* found '\0' after 'begin' */
                                    return BEGIN_NON_ANOYBLOCK;
                                /* look for other transaction stmt */
                                if (is_trans_stmt(yyextra->scanbuf, yyextra->scanbuflen))
                                    return BEGIN_NON_ANOYBLOCK;
                            }
						}
						else if (keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->select : SELECT) ||
								 keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->update : UPDATE) ||
								 keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->insert : INSERT) ||
								 keyword->value == (isPlpgsqlKeyword? yyextra->plKeywordValue->Delete : DELETE_P) ||
								 keyword->value == MERGE)
						{
							yyextra->is_hint_str = true;
						}

                        set_is_delimiter_name(yytext,yyscanner);
						return keyword->value;
					}

					/*
					 * No.  Convert the identifier to lower case, and truncate
					 * if necessary.
					 */
					ident = downcase_truncate_identifier(yytext, yyleng, yyextra->warnOnTruncateIdent);
					yylval->str = ident;
					yyextra->ident_quoted = false;
					set_is_delimiter_name(yytext,yyscanner);
					return IDENT;
				}

{setUserIdentifier}	{
					SET_YYLLOC();
					if (u_sess->attr.attr_sql.sql_compatibility == B_FORMAT && u_sess->attr.attr_common.enable_set_variable_b_format) {
						yylval->str = pstrdup(yytext + 1);
						yyextra->is_hint_str = false;
						return SET_USER_IDENT;
					} else {
						yyless(1);
						yylval->str = pstrdup(yytext);
						yyextra->is_hint_str = false;
						return yytext[0];
					}
				}

{other}			{
					SET_YYLLOC();
					yyextra->is_hint_str = false;
					return yytext[0];
				}

<<EOF>>			{
					SET_YYLLOC();
					yyterminate();
				}

%%

/*
 * Arrange access to yyextra for subroutines of the main yylex() function.
 * We expect each subroutine to have a yyscanner parameter.  Rather than
 * use the yyget_xxx functions, which might or might not get inlined by the
 * compiler, we cheat just a bit and cast yyscanner to the right type.
 */
#undef yyextra
#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)

/* Likewise for a couple of other things we need. */
#undef yylloc
#define yylloc  (((struct yyguts_t *) yyscanner)->yylloc_r)
#undef yyleng
#define yyleng  (((struct yyguts_t *) yyscanner)->yyleng_r)


/*
 * scanner_errposition
 *		Report a lexer or grammar error cursor position, if possible.
 *
 * This is expected to be used within an ereport() call.  The return value
 * is a dummy (always 0, in fact).
 *
 * Note that this can only be used for messages emitted during raw parsing
 * (essentially, scan.l and gram.y), since it requires the yyscanner struct
 * to still be available.
 */
int
scanner_errposition(int location, core_yyscan_t yyscanner)
{
	int		pos;

	if (location < 0)
		return 0;				/* no-op if location is unknown */

	/* Convert byte offset to character number */
	pos = pg_mbstrlen_with_len(yyextra->scanbuf, location) + 1;
	/* And pass it to the ereport mechanism */
	return errposition(pos);
}

/*
 * scanner_yyerror
 *		Report a lexer or grammar error.
 *
 * The message's cursor position is whatever YYLLOC was last set to,
 * ie, the start of the current token if called within yylex(), or the
 * most recently lexed token if called from the grammar.
 * This is OK for syntax error messages from the Bison parser, because Bison
 * parsers report error as soon as the first unparsable token is reached.
 * Beware of using yyerror for other purposes, as the cursor position might
 * be misleading!
 */
void
scanner_yyerror(const char *message, core_yyscan_t yyscanner)
{
	const char *loc = yyextra->scanbuf + *yylloc;
	u_sess->plsql_cxt.have_error = true;
    int errstate = 0;
	int lines = 0;
	int rc = CompileWhich();
#ifndef ENABLE_MULTIPLE_NODES
        if (rc != PLPGSQL_COMPILE_NULL && u_sess->attr.attr_common.plsql_show_all_error) {
            lines = GetProcedureLineNumberInPackage(u_sess->plsql_cxt.curr_compile_context->core_yy->scanbuf, u_sess->plsql_cxt.plpgsql_yylloc);
            addErrorList(message, lines);
        }
        if (u_sess->attr.attr_common.plsql_show_all_error && rc != PLPGSQL_COMPILE_NULL) {
            errstate = NOTICE;
        } else {
            errstate = ERROR;
        }
#else
        errstate = ERROR;
#endif
    if (rc == PLPGSQL_COMPILE_PACKAGE_PROC) {
		PLpgSQL_function* func = u_sess->plsql_cxt.curr_compile_context->plpgsql_curr_compile;
		if (*loc == YY_END_OF_BUFFER_CHAR)
		{
            ereport(errstate,
                (errmodule(MOD_PLSQL), errcode(ERRCODE_SYNTAX_ERROR),
                    errmsg("%s at end of input when compile function %s", _(message), func->fn_signature),
                    errdetail("syntax error"),
                    errcause("The package declaration contains a character string error."),
                    erraction("Check character string")));
		}
		else
		{
            ereport(errstate,
                (errmodule(MOD_PLSQL), errcode(ERRCODE_SYNTAX_ERROR),
                    errmsg("%s at or near \"%s\" when compile function %s", _(message), loc, func->fn_signature),
                    errdetail("syntax error"),
                    errcause("The package declaration contains a character string error."),
                    erraction("Check character string")));
		}
	} else {
		if (*loc == YY_END_OF_BUFFER_CHAR)
		{
			ereport(errstate,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 /* translator: %s is typically the translation of "syntax error" */
				 errmsg("%s at end of input", _(message)),
				 lexer_errposition()));
		}
		else
		{
			ereport(errstate,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 /* translator: first %s is typically the translation of "syntax error" */
				 errmsg("%s at or near \"%s\"", _(message), loc),
				 lexer_errposition()));
		}
	}
}


/*
 * Called before any actual parsing is done
 */
core_yyscan_t
scanner_init(const char *str,
			 core_yy_extra_type *yyext,
			 const ScanKeyword *keywords,
			 int num_keywords)
{
	Size		slen = strlen(str);
	yyscan_t	scanner;

	if (yylex_init(&scanner) != 0)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
					errmsg("yylex_init() failed: %m")));

	core_yyset_extra(yyext, scanner);

	yyext->keywords = keywords;
	yyext->num_keywords = num_keywords;
	yyext->in_slash_proc_body = false;
	yyext->paren_depth = 0;
	yyext->query_string_locationlist = NIL;
	yyext->is_createstmt = false;
	yyext->dolqstart = NULL;
	yyext->is_hint_str = false;
	yyext->parameter_list = NIL;
	yyext->include_ora_comment = false;
	yyext->func_param_begin = 0;
	yyext->func_param_end = 0;

	/*
	 * Make a scan buffer with special termination needed by flex.
	 */
	yyext->scanbuf = (char *) palloc(slen + 2);
	yyext->scanbuflen = slen;
	memcpy(yyext->scanbuf, str, slen);
	yyext->scanbuf[slen] = yyext->scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
	yy_scan_buffer(yyext->scanbuf, slen + 2, scanner);

	/* initialize literal buffer to a reasonable but expansible size */
	yyext->literalalloc = 1024;
	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
	yyext->literallen = 0;
	yyext->warnOnTruncateIdent = true;

    /* plpgsql keyword params */
    yyext->isPlpgsqlKeyWord = false;
    yyext->plKeywordValue = NULL;
	yyext->is_delimiter_name = false;
	yyext->is_last_colon = false;
	yyext->is_proc_end = false;

	// Added CALL for procedure and function
	getDynaParamSeq("init", true, true, NULL);

	return scanner;
}


/*
 * Called after parsing is done to clean up after scanner_init()
 */
void
scanner_finish(core_yyscan_t yyscanner)
{
	if (t_thrd.postgres_cxt.clear_key_memory)
	{
		errno_t rc = EOK;
		memset(yyextra->scanbuf, 0x7F, yyextra->scanbuflen);
		*(volatile char*)(yyextra->scanbuf) = *(volatile char*)(yyextra->scanbuf);
		rc = memset_s(yyextra->literalbuf, yyextra->literallen, 0x7F, yyextra->literallen);
		securec_check(rc, "\0", "\0");
	}

	/*
	 * We don't bother to call yylex_destroy(), because all it would do
	 * is pfree a small amount of control storage.  It's cheaper to leak
	 * the storage until the parsing context is destroyed.  The amount of
	 * space involved is usually negligible compared to the output parse
	 * tree anyway.
	 *
	 * We do bother to pfree the scanbuf and literal buffer, but only if they
	 * represent a nontrivial amount of space.  The 8K cutoff is arbitrary.
	 */
	if (yyextra->scanbuflen >= 8192)
		FREE_POINTER(yyextra->scanbuf);
	if (yyextra->literalalloc >= 8192)
		FREE_POINTER(yyextra->literalbuf);
	if (yyextra->parameter_list)
	{
		list_free_deep(yyextra->parameter_list);
		yyextra->parameter_list = NIL;
	}
}


static void
addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
{
	/* enlarge buffer if needed */
	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
	{
		do
		{
			yyextra->literalalloc *= 2;
		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);

		/*when yytext is larger than 512M, its double will exceed 1G, so we use repalloc_huge */
		yyextra->literalbuf = (char *) repalloc_huge(yyextra->literalbuf,
												yyextra->literalalloc);
	}
	/* append new data */
	memcpy(yyextra->literalbuf + yyextra->literallen, ytext, yleng);
	yyextra->literallen += yleng;
}


static void
addlitchar(unsigned char ychar, core_yyscan_t yyscanner)
{
	/* enlarge buffer if needed */
	if ((yyextra->literallen + 1) >= yyextra->literalalloc)
	{
		yyextra->literalalloc *= 2;
		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
												yyextra->literalalloc);
	}
	/* append new data */
	yyextra->literalbuf[yyextra->literallen] = ychar;
	yyextra->literallen += 1;
}

static void set_is_delimiter_name(char* text, core_yyscan_t yyscanner)
{
	if (u_sess->attr.attr_sql.sql_compatibility == B_FORMAT) {
		if (strcmp(text,u_sess->attr.attr_common.delimiter_name) == 0 && yyextra->paren_depth == 0 && !yyextra->in_slash_proc_body) {
			if (strcmp(text,";") != 0) {
				yyextra->query_string_locationlist = lappend_int(yyextra->query_string_locationlist, *yylloc);
				yyextra->is_createstmt = false;
			}
			yyextra->is_delimiter_name = true;
		} else {
			yyextra->is_delimiter_name = false;
		}
		if (strcmp(text,u_sess->attr.attr_common.delimiter_name) == 0 && strcmp(text,";") != 0 && yyextra->in_slash_proc_body) {
			yyextra->is_proc_end = true;
		}
	}
}


/*
 * Create a palloc'd copy of literalbuf, adding a trailing null.
 */
static char *
litbufdup(core_yyscan_t yyscanner)
{
	int			llen = yyextra->literallen;
	char	   *newm;

	newm = (char *)palloc(llen + 1);
	memcpy(newm, yyextra->literalbuf, llen);
	newm[llen] = '\0';
	return newm;
}

static int
process_integer_literal(const char *token, YYSTYPE *lval)
{
	long		val;
	char	   *endptr;

	errno = 0;
	val = strtol(token, &endptr, 10);
	if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64
		/* if long > 32 bits, check for overflow of int4 */
		|| val != (long) ((int32) val)
#endif
		)
	{
		/* integer too large, treat it as a float */
		lval->str = pstrdup(token);
		return FCONST;
	}
	lval->ival = val;
	return ICONST;
}

static unsigned int
hexval(unsigned char c)
{
	if (c >= '0' && c <= '9')
		return c - '0';
	if (c >= 'a' && c <= 'f')
		return c - 'a' + 0xA;
	if (c >= 'A' && c <= 'F')
		return c - 'A' + 0xA;
	ereport(ERROR,
		(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
		errmsg("invalid hexadecimal digit")));
	return 0; /* not reached */
}

static void
check_unicode_value(pg_wchar c, const char *loc, core_yyscan_t yyscanner)
{
	if (GetDatabaseEncoding() == PG_UTF8)
		return;

	if (c > 0x7F)
	{
		ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3);   /* 3 for U&" */
		yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
	}
}

static bool
is_utf16_surrogate_first(pg_wchar c)
{
	return (c >= 0xD800 && c <= 0xDBFF);
}

static bool
is_utf16_surrogate_second(pg_wchar c)
{
	return (c >= 0xDC00 && c <= 0xDFFF);
}

static pg_wchar
surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
{
	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}

static void
addunicode(pg_wchar c, core_yyscan_t yyscanner)
{
	char buf[8];

	if (c == 0 || c > 0x10FFFF)
		yyerror("invalid Unicode escape value");
	if (c > 0x7F)
	{
		if (GetDatabaseEncoding() != PG_UTF8)
			yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
		yyextra->saw_non_ascii = true;
	}
	unicode_to_utf8(c, (unsigned char *) buf);
	addlit(buf, pg_mblen(buf), yyscanner);
}

static char *
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{
	char *newm;
	char *litbuf, *in, *out;
	pg_wchar pair_first = 0;

	if (isxdigit(escape)
		|| escape == '+'
		|| escape == '\''
		|| escape == '"'
		|| scanner_isspace(escape))
	{
		ADVANCE_YYLLOC(yyextra->literallen + yyleng + 1);
		yyerror("invalid Unicode escape character");
	}

	/* Make literalbuf null-terminated to simplify the scanning loop */
	litbuf = yyextra->literalbuf;
	litbuf[yyextra->literallen] = '\0';

	/*
	 * This relies on the subtle assumption that a UTF-8 expansion
	 * cannot be longer than its escaped representation.
	 */
	newm = (char *)palloc(yyextra->literallen + 1);

	in = litbuf;
	out = newm;
	while (*in)
	{
		if (in[0] == escape)
		{
			if (in[1] == escape)
			{
				if (pair_first)
				{
					ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
					yyerror("invalid Unicode surrogate pair");
				}
				*out++ = escape;
				in += 2;
			}
			else if (isxdigit((unsigned char) in[1]) &&
					 isxdigit((unsigned char) in[2]) &&
					 isxdigit((unsigned char) in[3]) &&
					 isxdigit((unsigned char) in[4]))
			{
				pg_wchar unicode;

				unicode = (hexval(in[1]) << 12) +
					(hexval(in[2]) << 8) +
					(hexval(in[3]) << 4) +
					hexval(in[4]);
				check_unicode_value(unicode, in, yyscanner);
				if (pair_first)
				{
					if (is_utf16_surrogate_second(unicode))
					{
						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
						pair_first = 0;
					}
					else
					{
						ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
						yyerror("invalid Unicode surrogate pair");
					}
				}
				else if (is_utf16_surrogate_second(unicode))
					yyerror("invalid Unicode surrogate pair");

				if (is_utf16_surrogate_first(unicode))
					pair_first = unicode;
				else
				{
					unicode_to_utf8(unicode, (unsigned char *) out);
					out += pg_mblen(out);
				}
				in += 5;
			}
			else if (in[1] == '+' &&
					 isxdigit((unsigned char) in[2]) &&
					 isxdigit((unsigned char) in[3]) &&
					 isxdigit((unsigned char) in[4]) &&
					 isxdigit((unsigned char) in[5]) &&
					 isxdigit((unsigned char) in[6]) &&
					 isxdigit((unsigned char) in[7]))
			{
				pg_wchar unicode;

				unicode = (hexval(in[2]) << 20) +
					(hexval(in[3]) << 16) +
					(hexval(in[4]) << 12) +
					(hexval(in[5]) << 8) +
					(hexval(in[6]) << 4) +
					hexval(in[7]);
				check_unicode_value(unicode, in, yyscanner);
				if (pair_first)
				{
					if (is_utf16_surrogate_second(unicode))
					{
						unicode = surrogate_pair_to_codepoint(pair_first, unicode);
						pair_first = 0;
					}
					else
					{
						ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
						yyerror("invalid Unicode surrogate pair");
					}
				}
				else if (is_utf16_surrogate_second(unicode))
					yyerror("invalid Unicode surrogate pair");

				if (is_utf16_surrogate_first(unicode))
					pair_first = unicode;
				else
				{
					unicode_to_utf8(unicode, (unsigned char *) out);
					out += pg_mblen(out);
				}
				in += 8;
			}
			else
			{
				ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
				yyerror("invalid Unicode escape value");
			}
		}
		else
		{
			if (pair_first)
			{
				ADVANCE_YYLLOC(in - litbuf + 3);   /* 3 for U&" */
				yyerror("invalid Unicode surrogate pair");
			}
			*out++ = *in++;
		}
	}

	/* unfinished surrogate pair? */
	if (pair_first)
	{
		ADVANCE_YYLLOC(in - litbuf + 3);			/* 3 for U&" */
		yyerror("invalid Unicode surrogate pair");
	}

	*out = '\0';
	/*
	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
	 * codes; but it's probably not worth the trouble, since this isn't
	 * likely to be a performance-critical path.
	 */
	pg_verifymbstr(newm, out - newm, false);
	return newm;
}

static unsigned char
unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
{
	switch (c)
	{
		case 'b':
			return '\b';
		case 'f':
			return '\f';
		case 'n':
			return '\n';
		case 'r':
			return '\r';
		case 't':
			return '\t';
		default:
			/* check for backslash followed by non-7-bit-ASCII */
			if (c == '\0' || IS_HIGHBIT_SET(c))
				yyextra->saw_non_ascii = true;

			return c;
	}
}

static void
check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner)
{
	if (ychar == '\'')
	{
		if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
			ereport(WARNING,
					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
					 errmsg("nonstandard use of \\' in a string literal"),
					 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
					 lexer_errposition()));
		yyextra->warn_on_first_escape = false;	/* warn only once per string */
	}
	else if (ychar == '\\')
	{
		if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
			ereport(WARNING,
					(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
					 errmsg("nonstandard use of \\\\ in a string literal"),
					 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
					 lexer_errposition()));
		yyextra->warn_on_first_escape = false;	/* warn only once per string */
	}
	else
		check_escape_warning(yyscanner);
}

static void
check_escape_warning(core_yyscan_t yyscanner)
{
	if (yyextra->warn_on_first_escape && u_sess->attr.attr_sql.escape_string_warning)
		ereport(WARNING,
				(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
				 errmsg("nonstandard use of escape in a string literal"),
				 errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
				 lexer_errposition()));
	yyextra->warn_on_first_escape = false;	/* warn only once per string */
}

/*
 * Interface functions to make flex use palloc() instead of malloc().
 * It'd be better to make these static, but flex insists otherwise.
 */

void *
core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner)
{
	return palloc(bytes);
}

void *
core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner)
{
	if (ptr)
		return repalloc(ptr, bytes);
	else
		return palloc(bytes);
}

void
core_yyfree(void *ptr, core_yyscan_t yyscanner)
{
	if (ptr)
		FREE_POINTER(ptr);
}


/*
 * @Description:  get the parameter sequence of dynamic SQL
 * @in string: parameter name
 * @in initflag:  mark the operation is init or not
 * @in placeholder: the flag to mark the binding parameter is placeholder or dollar quoting
 * @in yyscanner: for yyextra
 * @return - the sequence number of the parameter
 */
long
getDynaParamSeq(const char *string, bool initflag, bool placeholder, core_yyscan_t yyscanner)
{
	int result = 0;
	char* str = NULL;
	const ListCell *cell;

	if (initflag)
	{
		u_sess->parser_cxt.is_load_copy = false;
		u_sess->parser_cxt.col_list = NULL;
		u_sess->parser_cxt.has_dollar = false;
		u_sess->parser_cxt.has_placeholder = false;
		return 0;
	}

	if (placeholder == false)
	{
		if (u_sess->parser_cxt.has_placeholder)
			ereport(ERROR,
					(errcode(ERRCODE_SYNTAX_ERROR),
					errmsg("It is forbidden to use placeholder and dollar quoting together.")));
		u_sess->parser_cxt.has_dollar = true;
		return atol(string);
	}

	u_sess->parser_cxt.has_placeholder = true;
	if (u_sess->parser_cxt.has_dollar)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				errmsg("It is forbidden to use placeholder and dollar quoting together.")));


	if(u_sess->parser_cxt.is_load_copy == true){
		if (yyextra->ident_quoted)
			u_sess->parser_cxt.copy_fieldname = pstrdup(string);
		else
			u_sess->parser_cxt.copy_fieldname = pg_strtolower(pstrdup(string));
	}
	foreach(cell, yyextra->parameter_list)
	{
		result++;
		if (strcmp((char*)(lfirst(cell)),string) == 0)
			return result;
	}

	str = pstrdup(string);
	yyextra->parameter_list = lappend(yyextra->parameter_list, (void*)str);

	return result + 1;
}

/*
 * @Description: if we found begin, check if is a transaction stmt
 * @param[IN] haystack:  the give source string
 * @param[IN] haystack_len: the length of haystack. Note that haystack may have been separated into words by '\0',
 							so haystack_len is needed.
 * @return: true is a transaction stmt, false if not.
 *
 * we have to deal with a tricky case in which we recieve a sql like "begin   " which is not terminated with ';' and
 * followed by servral blank char. In this case we add a variable 'found_non_blank_char' to handle this case.
 * if we haven't found any non blank char in the sql, consider it to be a transaction stmt.
 */
static bool
is_trans_stmt(const char *haystack, int haystack_len)
{
	char *tempstr = (char *)palloc0(haystack_len + 1);
	char *temp = tempstr;
	int line = 1; /* lineno of haystack which split by \0 */
	bool found_non_blank_char = false; /* mark if we find a non blank char after begin */
	errno_t rc = EOK;

	/* we have to make a copy, since haystack is const char* */
	rc = memcpy_s(tempstr, haystack_len + 1, haystack, haystack_len);
	securec_check_ss(rc, "\0", "\0");

	/* find if the 2nd line is prefixed by a valid transaction token */
	while (temp < tempstr + haystack_len)
	{
		/* there may be '\0' in the string, and should be skipped */
		if (*temp == '\0')
		{
			temp++;
			line++;
			/* we only search the 2nd line */
			if (line > 2)
				break;
		}
		/* skip the blank char */
		else if (isspace(*temp))
		{
			temp++;
		}
		else
		{
			/* we found a non blank char after begin, do further checking */
			if (line == 2)
				found_non_blank_char = true;
			/* For a transaction statement, all possible tokens after BEGIN are here */
			if (line == 2 &&(pg_strncasecmp(temp, "transaction", strlen("transaction")) == 0 ||
							  pg_strncasecmp(temp, "work", strlen("work")) == 0 ||
							  pg_strncasecmp(temp, "isolation", strlen("isolation")) == 0 ||
							  pg_strncasecmp(temp, "read", strlen("read")) == 0 ||
							  pg_strncasecmp(temp, "deferrable", strlen("deferrable")) == 0 ||
							  pg_strncasecmp(temp, "not", strlen("not")) == 0 ||
							  pg_strncasecmp(temp, ";", strlen(";")) == 0))
			{
				FREE_POINTER(tempstr);
				return true;
			}

			temp += strlen(temp);
		}
	}

	pfree (tempstr);

	/*
	 * if all the char after begin are blank
	 *    it is a trans stmt
	 * else
	 *    it is a anaynomous block stmt
	 */
	return found_non_blank_char ? false : true;
}

void addErrorList(const char* message, int lines)
{
    PLpgSQL_error* erritem;
    MemoryContext oldcxt;
    oldcxt = MemoryContextSwitchTo(SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_OPTIMIZER));
    erritem = (PLpgSQL_error*)palloc(sizeof(PLpgSQL_error));
    erritem->errmsg = pstrdup(message);
    erritem->line = lines;
    u_sess->plsql_cxt.errorList = lappend(u_sess->plsql_cxt.errorList, erritem);
    MemoryContextSwitchTo(oldcxt);
}