[FEAT MERGE]charset latin1

This commit is contained in:
akaError
2023-01-28 15:43:48 +08:00
committed by ob-robot
parent 2663894581
commit af2506b14c
42 changed files with 1215 additions and 392 deletions

View File

@ -191,6 +191,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
}
return idf_pos;
}
@ -501,6 +503,15 @@ int ObFastParserBase::process_interval()
return ret;
}
inline int64_t ObFastParserBase::is_latin1_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (is_latin1(raw_sql_.char_at(pos))) {
idf_pos = pos + 1;
}
return idf_pos;
}
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
inline int64_t ObFastParserBase::is_utf8_char(const int64_t pos)
{
@ -1154,6 +1165,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
idf_pos = is_utf8_char(pos);
} else if (CHARSET_GBK == charset_type_ || CHARSET_GB18030 == charset_type_) {
idf_pos = is_gbk_char(pos);
} else if (CHARSET_LATIN1 == charset_type_) {
idf_pos = is_latin1_char(pos);
}
return idf_pos;
}

View File

@ -318,6 +318,8 @@ protected:
return is_valid_char(ch) && USER_VAR_CHAR[static_cast<uint8_t>(ch)];
}
void reset_parser_node(ParseNode *node);
//{U}
int64_t is_latin1_char(const int64_t pos);
// ({U_2}{U}|{U_3}{U}{U}|{U_4}{U}{U}{U}
int64_t is_utf8_char(const int64_t pos);
// NOTES: No boundary check, the caller guarantees safety!!!
@ -375,7 +377,11 @@ protected:
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
}
inline bool is_latin1(char ch)
{
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x00 && static_cast<uint8_t>(ch) <= 0xFF;
}
// [0-9]{n}
inline bool is_n_continuous_digits(const char *str,
const int64_t pos,

View File

@ -232,7 +232,10 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/: {
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
//case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
//case 47/*CS_TYPE_LATIN1_BIN*/:
{
if (i + 2 < dup_len) {
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {
//utf8 multi byte space

View File

@ -112,6 +112,7 @@ _UTF8 { REPUT_TOKEN_NEG_SIGN(_UTF8); }
_UTF8MB4 { REPUT_TOKEN_NEG_SIGN(_UTF8MB4); }
_GBK { REPUT_TOKEN_NEG_SIGN(_GBK); }
_GB18030 { REPUT_TOKEN_NEG_SIGN(_GB18030); }
_LATIN1 { REPUT_TOKEN_NEG_SIGN(_LATIN1); }
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }
NOT {

View File

@ -186,7 +186,7 @@ APPEND NO_GATHER_OPTIMIZER_STATISTICS GATHER_OPTIMIZER_STATISTICS
NEG_SIGN
%token /*can not be relation name*/
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 CNNOP
_BINARY _UTF8 _UTF8MB4 _GBK _UTF16 _GB18030 _LATIN1 CNNOP
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN
END_P SET_VAR DELIMITER
@ -952,6 +952,15 @@ _UTF8
YYABORT_NO_MEMORY;
}
}
| _LATIN1
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("latin1", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "No more space for mallocing string");
YYABORT_NO_MEMORY;
}
}
| _GB18030
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);