[CP] [FEAT MERGE]字符集从42xrelease分支 patch 合入master分支
This commit is contained in:
parent
e2ca6caa6a
commit
0dc70f6600
5
.gitignore
vendored
5
.gitignore
vendored
@ -148,6 +148,11 @@ src/sql/parser/sql_parser_oracle_gbk_mode_lex.c
|
||||
src/sql/parser/sql_parser_oracle_gbk_mode_lex.h
|
||||
src/sql/parser/sql_parser_oracle_gbk_mode_tab.c
|
||||
src/sql/parser/sql_parser_oracle_gbk_mode_tab.h
|
||||
src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
|
||||
src/sql/parser/sql_parser_oracle_hkscs_mode_lex.h
|
||||
src/sql/parser/sql_parser_oracle_hkscs_mode_tab.c
|
||||
src/sql/parser/sql_parser_oracle_hkscs_mode_tab.h
|
||||
src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
|
||||
src/sql/parser/sql_parser_oracle_utf8_mode_lex.c
|
||||
src/sql/parser/sql_parser_oracle_utf8_mode_lex.h
|
||||
src/sql/parser/sql_parser_oracle_utf8_mode_tab.c
|
||||
|
2
deps/oblib/src/common/object/ob_obj_funcs.h
vendored
2
deps/oblib/src/common/object/ob_obj_funcs.h
vendored
@ -1297,7 +1297,7 @@ inline int obj_print_plain_str<ObHexStringType>(const ObObj &obj, char *buffer,
|
||||
PRINT_META(); \
|
||||
BUF_PRINTO(ob_obj_type_str(obj.get_type())); \
|
||||
J_COLON(); \
|
||||
if (obj.is_binary() || src_type == CHARSET_UTF16) { \
|
||||
if (obj.is_binary() || src_type == CHARSET_UTF16 || src_type == CHARSET_UTF16LE) { \
|
||||
hex_print(obj.get_string_ptr(), obj.get_string_len(), buf, buf_len, pos); \
|
||||
} else { \
|
||||
BUF_PRINTO(obj.get_varchar()); \
|
||||
|
60
deps/oblib/src/common/object/ob_object.h
vendored
60
deps/oblib/src/common/object/ob_object.h
vendored
@ -303,15 +303,15 @@ public:
|
||||
OB_INLINE bool is_decimal_int() const { return type_ == static_cast<uint8_t>(ObDecimalIntType); }
|
||||
OB_INLINE bool is_varchar() const
|
||||
{
|
||||
return ((type_ == static_cast<uint8_t>(ObVarcharType)) && (CS_TYPE_BINARY != cs_type_));
|
||||
return ((type_ == static_cast<uint8_t>(ObVarcharType)) && (CS_TYPE_BINARY != get_collation_type()));
|
||||
}
|
||||
OB_INLINE bool is_char() const
|
||||
{
|
||||
return ((type_ == static_cast<uint8_t>(ObCharType)) && (CS_TYPE_BINARY != cs_type_));
|
||||
return ((type_ == static_cast<uint8_t>(ObCharType)) && (CS_TYPE_BINARY != get_collation_type()));
|
||||
}
|
||||
OB_INLINE bool is_varbinary() const
|
||||
{
|
||||
return (type_ == static_cast<uint8_t>(ObVarcharType) && CS_TYPE_BINARY == cs_type_);
|
||||
return (type_ == static_cast<uint8_t>(ObVarcharType) && CS_TYPE_BINARY == get_collation_type());
|
||||
}
|
||||
static bool is_binary(const ObObjType type, const ObCollationType cs_type)
|
||||
{
|
||||
@ -319,11 +319,11 @@ public:
|
||||
}
|
||||
OB_INLINE bool is_binary() const
|
||||
{
|
||||
return is_binary(static_cast<ObObjType>(type_), static_cast<ObCollationType>(cs_type_));
|
||||
return is_binary(static_cast<ObObjType>(type_), get_collation_type());
|
||||
}
|
||||
OB_INLINE bool is_cs_collation_free() const
|
||||
{
|
||||
return cs_type_ == CS_TYPE_UTF8MB4_GENERAL_CI || cs_type_ == CS_TYPE_UTF8MB4_BIN;
|
||||
return get_collation_type() == CS_TYPE_UTF8MB4_GENERAL_CI || get_collation_type() == CS_TYPE_UTF8MB4_BIN;
|
||||
}
|
||||
OB_INLINE bool is_hex_string() const { return type_ == static_cast<uint8_t>(ObHexStringType); }
|
||||
OB_INLINE bool is_raw() const { return type_ == static_cast<uint8_t>(ObRawType); }
|
||||
@ -337,23 +337,23 @@ public:
|
||||
|| type_ == static_cast<uint8_t>(ObSetType); }
|
||||
OB_INLINE bool is_text() const
|
||||
{
|
||||
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY != cs_type_);
|
||||
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY != get_collation_type());
|
||||
}
|
||||
/*OB_INLINE bool is_oracle_clob() const
|
||||
{
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != cs_type_);
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != get_collation_type());
|
||||
}*/
|
||||
OB_INLINE bool is_clob() const
|
||||
{
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != cs_type_);
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != get_collation_type());
|
||||
}
|
||||
/*OB_INLINE bool is_oracle_blob() const
|
||||
{
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY == cs_type_);
|
||||
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY == get_collation_type());
|
||||
}*/
|
||||
OB_INLINE bool is_blob() const
|
||||
{
|
||||
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY == cs_type_);
|
||||
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY == get_collation_type());
|
||||
}
|
||||
OB_INLINE bool is_lob_storage() const
|
||||
{ return ob_is_large_text(get_type())
|
||||
@ -416,24 +416,46 @@ public:
|
||||
OB_INLINE bool is_oracle_decimal() const { return ObNumberType == type_ || ObFloatType == type_ || ObDoubleType == type_ || ObDecimalIntType == type_; }
|
||||
|
||||
OB_INLINE bool is_urowid() const { return ObURowIDType == type_; }
|
||||
OB_INLINE bool is_blob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY == cs_type_); }
|
||||
OB_INLINE bool is_clob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY != cs_type_); }
|
||||
OB_INLINE bool is_blob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY == get_collation_type()); }
|
||||
OB_INLINE bool is_clob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY != get_collation_type()); }
|
||||
OB_INLINE bool is_lob_locator() const { return ObLobType == type_; }
|
||||
|
||||
OB_INLINE bool is_interval_type() const { return is_interval_ds() || is_interval_ym(); }
|
||||
OB_INLINE bool is_oracle_temporal_type() const { return is_datetime() || is_otimestamp_type() || is_interval_type(); }
|
||||
|
||||
OB_INLINE void set_collation_level(ObCollationLevel cs_level) { cs_level_ = cs_level; }
|
||||
OB_INLINE void set_collation_type(ObCollationType cs_type) { cs_type_ = cs_type; }
|
||||
OB_INLINE ObCollationType get_collation_type() {
|
||||
// ObUserDefinedSQLType reused cs_type as part of sub schema id, therefore always return CS_TYPE_BINARY
|
||||
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY : static_cast<ObCollationType>(cs_type_);
|
||||
OB_INLINE void set_cs_level(uint8_t cs_level) {
|
||||
cs_level_ = cs_level;
|
||||
}
|
||||
OB_INLINE uint8_t get_cs_level() {
|
||||
return cs_level_;
|
||||
}
|
||||
OB_INLINE void set_cs_type(uint8_t cs_type) {
|
||||
cs_type_ = cs_type;
|
||||
}
|
||||
OB_INLINE uint8_t get_cs_type() {
|
||||
return cs_type_;
|
||||
}
|
||||
|
||||
OB_INLINE void set_collation_level(ObCollationLevel cs_level) {
|
||||
cs_level_ = (cs_level_ & 0xF0) | (cs_level & 0xF);
|
||||
}
|
||||
OB_INLINE void set_collation_type(ObCollationType cs_type) {
|
||||
cs_type_ = (cs_type & 0xFF);
|
||||
cs_level_ = (cs_level_ & 0xF) | ((cs_type & 0xF00) >> 4);
|
||||
}
|
||||
OB_INLINE ObCollationType get_collation_type() {
|
||||
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY:
|
||||
static_cast<ObCollationType>((uint16_t)cs_type_ | (((uint16_t)cs_level_ & 0xF0) << 4));
|
||||
}
|
||||
|
||||
OB_INLINE void set_default_collation_type() { set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); }
|
||||
OB_INLINE ObCollationLevel get_collation_level() const { return static_cast<ObCollationLevel>(cs_level_); }
|
||||
OB_INLINE ObCollationLevel get_collation_level() const {
|
||||
return static_cast<ObCollationLevel>(cs_level_ & 0x0F);
|
||||
}
|
||||
OB_INLINE ObCollationType get_collation_type() const {
|
||||
// ObUserDefinedSQLType reused cs_type as part of sub schema id, therefore always return CS_TYPE_BINARY
|
||||
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY : static_cast<ObCollationType>(cs_type_);
|
||||
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY :
|
||||
static_cast<ObCollationType>((uint16_t)cs_type_ | (((uint16_t)cs_level_ & 0xF0) << 4) );
|
||||
}
|
||||
OB_INLINE ObCharsetType get_charset_type() const {
|
||||
return ObCharset::charset_type_by_coll(get_collation_type());
|
||||
|
4
deps/oblib/src/lib/CMakeLists.txt
vendored
4
deps/oblib/src/lib/CMakeLists.txt
vendored
@ -35,6 +35,10 @@ ob_set_subtarget(oblib_lib charset
|
||||
charset/uca900_zh2_tbls.cc
|
||||
charset/uca900_zh3_tbls.cc
|
||||
charset/ob_charset.cpp
|
||||
charset/ob_ctype_sjis.cc
|
||||
charset/ob_ctype_big5.cc
|
||||
charset/ob_ctype_hkscs.cc
|
||||
charset/ob_ctype_extra.cc
|
||||
)
|
||||
|
||||
ob_set_subtarget(oblib_lib common
|
||||
|
1
deps/oblib/src/lib/charset/ob_byteorder.h
vendored
1
deps/oblib/src/lib/charset/ob_byteorder.h
vendored
@ -16,6 +16,7 @@
|
||||
#include <stdint.h>
|
||||
#include "lib/charset/ob_template_helper.h"
|
||||
#include <netinet/in.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
Functions for big-endian loads and stores. These are safe to use
|
||||
|
1007
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
1007
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
File diff suppressed because it is too large
Load Diff
214
deps/oblib/src/lib/charset/ob_charset.h
vendored
214
deps/oblib/src/lib/charset/ob_charset.h
vendored
@ -40,6 +40,12 @@ enum ObCharsetType
|
||||
CHARSET_GB18030_2022 = 7,
|
||||
CHARSET_ASCII = 8,
|
||||
CHARSET_TIS620 = 9,
|
||||
CHARSET_UTF16LE = 10,
|
||||
CHARSET_SJIS = 11,
|
||||
CHARSET_BIG5 = 12,
|
||||
CHARSET_HKSCS = 13,
|
||||
CHARSET_HKSCS31 = 14,
|
||||
CHARSET_DEC8 = 15,
|
||||
CHARSET_MAX,
|
||||
};
|
||||
|
||||
@ -51,24 +57,34 @@ enum ObCharsetType
|
||||
*there is no possibly to reach AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_UTF8MB4] and so on
|
||||
*/
|
||||
static const int AGGREGATE_2CHARSET[CHARSET_MAX][CHARSET_MAX] = {
|
||||
//CHARSET_INVALI,CHARSET_UTF8MB4...
|
||||
{0,0,0,0,0,0,0,0,0,0},//CHARSET_INVALI
|
||||
{0,0,0,0,0,0,0,0,0,0},//CHARSET_BINARY
|
||||
{0,0,0,1,2,1,1,1,1,1},//CHARSET_UTF8MB4
|
||||
{0,0,2,0,2,0,1,0,1,0},//CHARSET_GBK
|
||||
{0,0,1,1,0,1,1,1,1,1},//CHARSET_UTF16
|
||||
{0,0,2,0,2,0,1,0,1,0},//CHARSET_GB18030
|
||||
{0,0,2,2,2,2,0,2,1,0},//CHARSET_LATIN1
|
||||
{0,0,2,0,2,0,1,0,1,0}, //CHARSET_GB18030_2022
|
||||
{0,0,2,2,2,2,2,2,0,2},//CHARSET_ASCII
|
||||
{0,0,2,0,2,0,0,0,1,0},//CHARSET_TIS620
|
||||
//CHARSET_INVALID,CHARSET_BINARY,CHARSET_UTF8MB4...
|
||||
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},//CHARSET_INVALID
|
||||
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},//CHARSET_BINARY
|
||||
{0,0,0,1,2,1,1,1,1,1,1,1,1,1,1,1},//CHARSET_UTF8MB4
|
||||
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GBK
|
||||
{0,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1},//CHARSET_UTF16
|
||||
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GB18030
|
||||
{0,0,2,2,2,2,0,2,1,0,2,0,0,0,0,0},//CHARSET_LATIN1
|
||||
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GB18030_2022
|
||||
{0,0,2,2,2,2,2,2,0,2,2,2,2,2,2,2},//CHARSET_ASCII
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0},//CHARSET_TIS620
|
||||
{0,0,2,1,0,1,1,1,1,1,0,1,1,1,1,1}, // UTF16LE
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // SJIS
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // BIG5
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // HKSCS
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // HKSCS31
|
||||
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0},// DEC8
|
||||
};
|
||||
|
||||
enum ObCollationType
|
||||
{
|
||||
CS_TYPE_INVALID = 0,
|
||||
CS_TYPE_BIG5_CHINESE_CI = 1,
|
||||
CS_TYPE_DEC8_SWEDISH_CI = 3,
|
||||
CS_TYPE_LATIN1_SWEDISH_CI = 8,
|
||||
CS_TYPE_ASCII_GENERAL_CI = 11,
|
||||
CS_TYPE_SJIS_JAPANESE_CI = 13,
|
||||
|
||||
CS_TYPE_TIS620_THAI_CI = 18,
|
||||
CS_TYPE_GBK_CHINESE_CI = 28,
|
||||
CS_TYPE_UTF8MB4_GENERAL_CI = 45,
|
||||
@ -76,13 +92,44 @@ enum ObCollationType
|
||||
CS_TYPE_LATIN1_BIN = 47,
|
||||
CS_TYPE_UTF16_GENERAL_CI = 54,
|
||||
CS_TYPE_UTF16_BIN = 55,
|
||||
CS_TYPE_UTF16LE_GENERAL_CI = 56,
|
||||
CS_TYPE_UTF16LE_BIN = 62,
|
||||
CS_TYPE_BINARY = 63,
|
||||
CS_TYPE_ASCII_BIN = 65,
|
||||
CS_TYPE_DEC8_BIN = 69,
|
||||
CS_TYPE_BIG5_BIN = 84,
|
||||
CS_TYPE_GBK_BIN = 87,
|
||||
CS_TYPE_SJIS_BIN = 88,
|
||||
|
||||
CS_TYPE_TIS620_BIN = 89,
|
||||
CS_TYPE_COLLATION_FREE = 100, // mysql中间没有使用这个
|
||||
CS_TYPE_UTF16_UNICODE_CI = 101,
|
||||
CS_TYPE_UTF16_ICELANDIC_UCA_CI = 102,
|
||||
CS_TYPE_UTF16_LATVIAN_UCA_CI = 103,
|
||||
CS_TYPE_UTF16_ROMANIAN_UCA_CI = 104,
|
||||
CS_TYPE_UTF16_SLOVENIAN_UCA_CI = 105,
|
||||
CS_TYPE_UTF16_POLISH_UCA_CI = 106,
|
||||
CS_TYPE_UTF16_ESTONIAN_UCA_CI = 107,
|
||||
CS_TYPE_UTF16_SPANISH_UCA_CI = 108,
|
||||
CS_TYPE_UTF16_SWEDISH_UCA_CI = 109,
|
||||
CS_TYPE_UTF16_TURKISH_UCA_CI = 110,
|
||||
CS_TYPE_UTF16_CZECH_UCA_CI = 111,
|
||||
CS_TYPE_UTF16_DANISH_UCA_CI = 112,
|
||||
CS_TYPE_UTF16_LITHUANIAN_UCA_CI = 113,
|
||||
CS_TYPE_UTF16_SLOVAK_UCA_CI = 114,
|
||||
CS_TYPE_UTF16_SPANISH2_UCA_CI = 115,
|
||||
CS_TYPE_UTF16_ROMAN_UCA_CI = 116,
|
||||
CS_TYPE_UTF16_PERSIAN_UCA_CI = 117,
|
||||
CS_TYPE_UTF16_ESPERANTO_UCA_CI = 118,
|
||||
CS_TYPE_UTF16_HUNGARIAN_UCA_CI = 119,
|
||||
CS_TYPE_UTF16_SINHALA_UCA_CI = 120,
|
||||
CS_TYPE_UTF16_GERMAN2_UCA_CI = 121,
|
||||
CS_TYPE_UTF16_CROATIAN_UCA_CI = 122,
|
||||
CS_TYPE_UTF16_UNICODE_520_CI = 123,
|
||||
CS_TYPE_UTF16_VIETNAMESE_CI = 124,
|
||||
CS_TYPE_ANY = 125, // unused in mysql
|
||||
CS_TYPE_HKSCS_BIN = 152,
|
||||
CS_TYPE_HKSCS31_BIN = 153,
|
||||
CS_TYPE_GB18030_2022_BIN = 216, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_PINYIN_CI = 217, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_PINYIN_CS = 218, // unused in mysql
|
||||
@ -91,20 +138,98 @@ enum ObCollationType
|
||||
CS_TYPE_GB18030_2022_STROKE_CI = 221, // unused in mysql
|
||||
CS_TYPE_GB18030_2022_STROKE_CS = 222, // unused in mysql
|
||||
CS_TYPE_UTF8MB4_UNICODE_CI = 224,
|
||||
CS_TYPE_UTF8MB4_CZECH_CI = 234,
|
||||
CS_TYPE_UTF8MB4_CROATIAN_CI = 245,
|
||||
CS_TYPE_UTF8MB4_UNICODE_520_CI = 246,
|
||||
CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_LATVIAN_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_ROMANIAN_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_SLOVENIAN_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_POLISH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_ESTONIAN_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_SPANISH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_SWEDISH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_TURKISH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_CZECH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_DANISH_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_LITHUANIAN_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_SLOVAK_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_SPANISH2_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_ROMAN_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_PERSIAN_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_ESPERANTO_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_HUNGARIAN_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_SINHALA_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_GERMAN2_UCA_CI ,
|
||||
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI,
|
||||
CS_TYPE_UTF8MB4_UNICODE_520_CI ,
|
||||
CS_TYPE_UTF8MB4_VIETNAMESE_CI ,
|
||||
CS_TYPE_GB18030_CHINESE_CI = 248,
|
||||
CS_TYPE_GB18030_BIN = 249,
|
||||
CS_TYPE_GB18030_CHINESE_CS = 251,
|
||||
|
||||
CS_TYPE_UTF8MB4_0900_AI_CI = 255,
|
||||
CS_TYPE_EXTENDED_MARK = 256, //the cs types below can not used for storing
|
||||
CS_TYPE_UTF8MB4_0900_BIN, //309 in mysql 8.0
|
||||
|
||||
CS_TYPE_UTF8MB4_DE_PB_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_IS_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_LV_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_RO_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_SL_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_PL_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_ET_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_ES_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_SV_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_TR_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_CS_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_DA_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_LT_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_SK_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_ES_TRAD_0900_AI_CI,
|
||||
CS_TYPE_UTF8MB4_LA_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_EO_0900_AI_CI = 273 ,
|
||||
CS_TYPE_UTF8MB4_HU_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_HR_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_VI_0900_AI_CI = 277 ,
|
||||
CS_TYPE_UTF8MB4_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_DE_PB_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_IS_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_LV_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_RO_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_SL_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_PL_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_ET_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_ES_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_SV_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_TR_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_CS_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_DA_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_LT_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_SK_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_ES_TRAD_0900_AS_CS,
|
||||
CS_TYPE_UTF8MB4_LA_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_EO_0900_AS_CS = 296,
|
||||
CS_TYPE_UTF8MB4_HU_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_HR_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_VI_0900_AS_CS = 300,
|
||||
CS_TYPE_UTF8MB4_JA_0900_AS_CS = 303,
|
||||
CS_TYPE_UTF8MB4_JA_0900_AS_CS_KS ,
|
||||
CS_TYPE_UTF8MB4_0900_AS_CI ,
|
||||
CS_TYPE_UTF8MB4_RU_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_RU_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS = 308 ,
|
||||
CS_TYPE_UTF8MB4_0900_BIN,
|
||||
CS_TYPE_UTF8MB4_NB_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_NB_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_NN_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_NN_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_SR_LATN_0900_AI_CI,
|
||||
CS_TYPE_UTF8MB4_SR_LATN_0900_AS_CS,
|
||||
CS_TYPE_UTF8MB4_BS_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_BS_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_BG_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_BG_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_GL_0900_AI_CI ,
|
||||
CS_TYPE_UTF8MB4_GL_0900_AS_CS ,
|
||||
CS_TYPE_UTF8MB4_MN_CYRL_0900_AI_CI,
|
||||
CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS,
|
||||
//pinyin order (occupied)
|
||||
CS_TYPE_PINYIN_BEGIN_MARK,
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS, //308 in mysql 8.0
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS_CPY, //308 in mysql 8.0
|
||||
CS_TYPE_GBK_ZH_0900_AS_CS,
|
||||
CS_TYPE_UTF16_ZH_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH_0900_AS_CS,
|
||||
@ -112,6 +237,12 @@ enum ObCollationType
|
||||
CS_TYPE_GB18030_2022_ZH_0900_AS_CS,
|
||||
CS_TYPE_ASCII_ZH_0900_AS_CS,
|
||||
CS_TYPE_TIS620_ZH_0900_AS_CS,
|
||||
CS_TYPE_UTF16LE_ZH_0900_AS_CS,
|
||||
CS_TYPE_SJIS_ZH_0900_AS_CS,
|
||||
CS_TYPE_BIG5_ZH_0900_AS_CS,
|
||||
CS_TYPE_HKSCS_ZH_0900_AS_CS,
|
||||
CS_TYPE_HKSCS31_ZH_0900_AS_CS,
|
||||
CS_TYPE_DEC8_ZH_0900_AS_CS,
|
||||
|
||||
//radical-stroke order
|
||||
CS_TYPE_RADICAL_BEGIN_MARK,
|
||||
@ -123,7 +254,12 @@ enum ObCollationType
|
||||
CS_TYPE_GB18030_2022_ZH2_0900_AS_CS,
|
||||
CS_TYPE_ASCII_ZH2_0900_AS_CS,
|
||||
CS_TYPE_TIS620_ZH2_0900_AS_CS,
|
||||
|
||||
CS_TYPE_UTF16LE_ZH2_0900_AS_CS,
|
||||
CS_TYPE_SJIS_ZH2_0900_AS_CS,
|
||||
CS_TYPE_BIG5_ZH2_0900_AS_CS,
|
||||
CS_TYPE_HKSCS_ZH2_0900_AS_CS,
|
||||
CS_TYPE_HKSCS31_ZH2_0900_AS_CS,
|
||||
CS_TYPE_DEC8_ZH2_0900_AS_CS,
|
||||
//stroke order
|
||||
CS_TYPE_STROKE_BEGIN_MARK,
|
||||
CS_TYPE_UTF8MB4_ZH3_0900_AS_CS,
|
||||
@ -134,7 +270,12 @@ enum ObCollationType
|
||||
CS_TYPE_GB18030_2022_ZH3_0900_AS_CS,
|
||||
CS_TYPE_ASCII_ZH3_0900_AS_CS,
|
||||
CS_TYPE_TIS620_ZH3_0900_AS_CS,
|
||||
|
||||
CS_TYPE_UTF16LE_ZH3_0900_AS_CS,
|
||||
CS_TYPE_SJIS_ZH3_0900_AS_CS,
|
||||
CS_TYPE_BIG5_ZH3_0900_AS_CS,
|
||||
CS_TYPE_HKSCS_ZH3_0900_AS_CS,
|
||||
CS_TYPE_HKSCS31_ZH3_0900_AS_CS,
|
||||
CS_TYPE_DEC8_ZH3_0900_AS_CS,
|
||||
CS_TYPE_MAX
|
||||
};
|
||||
|
||||
@ -149,9 +290,12 @@ enum ObNlsCharsetId
|
||||
CHARSET_ZHS16GBK_ID = 852,
|
||||
CHARSET_ZHS32GB18030_ID = 854,
|
||||
CHARSET_ZHS32GB18030_2022_ID = 859, // not used in oracle
|
||||
CHARSET_ZHT16HKSCS_ID = 868,
|
||||
CHARSET_UTF8_ID = 871,
|
||||
CHARSET_AL32UTF8_ID = 873,
|
||||
CHARSET_ZHT16HKSCS31_ID = 992,
|
||||
CHARSET_AL16UTF16_ID = 2000,
|
||||
CHARSET_AL16UTF16LE_ID = 2002,
|
||||
CHARSET_MAX_ID,
|
||||
};
|
||||
|
||||
@ -233,10 +377,8 @@ public:
|
||||
static const int32_t MAX_CASE_MULTIPLY = 4;
|
||||
//比如latin1 1byte ,utf8mb4 4byte,转换因子为4,也可以理解为最多使用4字节存储一个字符
|
||||
static const int32_t CharConvertFactorNum = 4;
|
||||
|
||||
static const int64_t VALID_CHARSET_TYPES = 9;
|
||||
static const int64_t VALID_COLLATION_TYPES = 31;
|
||||
|
||||
static const int64_t VALID_CHARSET_TYPES = 15;
|
||||
static const int64_t VALID_COLLATION_TYPES = 143;
|
||||
static int init_charset();
|
||||
// strntodv2 is an enhanced version of strntod,
|
||||
// which handles nan/infinity values in oracle mode.
|
||||
@ -407,19 +549,7 @@ public:
|
||||
static ObCollationType collation_type(const ObString &cs_name);
|
||||
static bool is_valid_collation(ObCharsetType charset_type, ObCollationType coll_type);
|
||||
static bool is_valid_collation(int64_t coll_type_int);
|
||||
static bool is_valid_charset(int64_t cs_type_int)
|
||||
{
|
||||
ObCharsetType charset_type = static_cast<ObCharsetType>(cs_type_int);
|
||||
return CHARSET_BINARY == charset_type
|
||||
|| CHARSET_UTF8MB4 == charset_type
|
||||
|| CHARSET_GBK == charset_type
|
||||
|| CHARSET_UTF16 == charset_type
|
||||
|| CHARSET_GB18030 == charset_type
|
||||
|| CHARSET_GB18030_2022 == charset_type
|
||||
|| CHARSET_LATIN1 == charset_type
|
||||
|| CHARSET_ASCII == charset_type
|
||||
|| CHARSET_TIS620 == charset_type;
|
||||
}
|
||||
static bool is_valid_charset(int64_t cs_type_int);
|
||||
static bool is_gb18030_2022(int64_t coll_type_int) {
|
||||
ObCollationType coll_type = static_cast<ObCollationType>(coll_type_int);
|
||||
return CS_TYPE_GB18030_2022_BIN <= coll_type && coll_type <= CS_TYPE_GB18030_2022_STROKE_CS;
|
||||
@ -588,15 +718,20 @@ public:
|
||||
static int get_nls_charset_id_by_charset_type(ObCharsetType charset_type);
|
||||
static ObNlsCharsetId charset_type_to_ora_charset_id(ObCharsetType cs_type);
|
||||
static ObCharsetType ora_charset_type_to_charset_type(ObNlsCharsetId charset_id);
|
||||
static int trim_end_of_str(const char *buf, int length, char *&trim_end, ObCharsetType ctype);
|
||||
static bool is_valid_nls_collation(ObNLSCollation nls_collation);
|
||||
static bool is_valid_ora_charset_id(ObNlsCharsetId charset_id);
|
||||
static ObCollationType ora_charset_type_to_coll_type(ObNlsCharsetId charset_id);
|
||||
static ObCollationType get_coll_type_by_nlssort_param(ObCharsetType charset_type,
|
||||
const ObString &nlssort_param);
|
||||
private:
|
||||
static int init_charset_and_arr();
|
||||
static int init_charset_info_coll_info(ObCharsetInfo *cs, ObCharsetLoader& loader);
|
||||
static bool is_argument_valid(const ObCharsetInfo *charset_info, const char *str, int64_t str_len);
|
||||
static bool is_argument_valid(const ObCollationType collation_type, const char *str1, int64_t str_len1, const char *str2, int64_t str_len2);
|
||||
static int copy_zh_cs(ObCharsetInfo *from_cs, ObCollationType to_coll_type, ObCharsetInfo *&to_cs);
|
||||
static int copy_zh_cs(ObCharsetInfo *from_cs, ObCharsetType charset_type, ObCharsetInfo *&to_cs);
|
||||
|
||||
private:
|
||||
// disallow copy
|
||||
DISALLOW_COPY_AND_ASSIGN(ObCharset);
|
||||
@ -604,6 +739,7 @@ private:
|
||||
static const ObCharsetWrapper charset_wrap_arr_[VALID_CHARSET_TYPES];
|
||||
static const ObCollationWrapper collation_wrap_arr_[VALID_COLLATION_TYPES];
|
||||
static ObCharsetInfo *charset_arr[CS_TYPE_MAX]; // CHARSET_INFO *
|
||||
static ObCharsetType collation_charset_map[CS_TYPE_MAX];
|
||||
static ObCharsetType default_charset_type_;
|
||||
static ObCollationType default_collation_type_;
|
||||
};
|
||||
@ -657,7 +793,7 @@ public:
|
||||
}
|
||||
|
||||
static int remove_char_endspace(ObString &str,
|
||||
const ObCharsetType &charset_type);
|
||||
const ObCharsetInfo *charsetInfo);
|
||||
private:
|
||||
static ObString const_str_for_ascii_[CHARSET_MAX][INT8_MAX + 1];
|
||||
};
|
||||
|
@ -700,6 +700,36 @@ public:
|
||||
foreach_char_prototype<CHARSET_UTF16, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_UTF16, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_UTF16LE:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_UTF16LE, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_UTF16LE, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_SJIS:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_SJIS, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_SJIS, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_HKSCS:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_HKSCS, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_HKSCS, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_HKSCS31:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_HKSCS31, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_HKSCS31, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_DEC8:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_DEC8, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_DEC8, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_BIG5:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_BIG5, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
: foreach_char_prototype<CHARSET_BIG5, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
|
||||
break;
|
||||
case CHARSET_LATIN1:
|
||||
ret = convert_unicode ?
|
||||
foreach_char_prototype<CHARSET_LATIN1, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
|
||||
|
58
deps/oblib/src/lib/charset/ob_ctype.h
vendored
58
deps/oblib/src/lib/charset/ob_ctype.h
vendored
@ -26,10 +26,13 @@
|
||||
#define OB_UTF8MB4_0900_AI_CI OB_UTF8MB4 "_0900_ai_ci"
|
||||
|
||||
#define OB_UTF16 "utf16"
|
||||
#define OB_UTF16LE "utf16le"
|
||||
|
||||
#define OB_UTF16_GENERAL_CI OB_UTF16 "_general_ci"
|
||||
#define OB_UTF16_BIN OB_UTF16 "_bin"
|
||||
#define OB_UTF16_UNICODE_CI OB_UTF16 "_unicode_ci"
|
||||
#define OB_UTF16LE_GENERAL_CI OB_UTF16LE "_general_ci"
|
||||
#define OB_UTF16LE_BIN OB_UTF16LE "_bin"
|
||||
|
||||
#define OB_LATIN1 "latin1"
|
||||
#define OB_LATIN1_SWEDISH_CI OB_LATIN1 "_swedish_ci"
|
||||
@ -262,6 +265,7 @@ typedef struct ObCharsetHandler
|
||||
char **endptr, int *error);
|
||||
size_t (*scan)(const struct ObCharsetInfo *, const char *b,
|
||||
const char *e, int sq);
|
||||
const unsigned char * (*skip_trailing_space)(const struct ObCharsetInfo *,const unsigned char *ptr,size_t len);
|
||||
} ObCharsetHandler;
|
||||
|
||||
static const int HASH_BUFFER_LENGTH = 128;
|
||||
@ -444,6 +448,7 @@ extern ObUniCtype ob_uni_ctype[256];
|
||||
//=============================================================================
|
||||
|
||||
extern ObUnicaseInfo ob_unicase_default;
|
||||
extern ObUnicaseInfo ob_unicase_turkish;
|
||||
extern ObUnicaseInfo ob_unicase_unicode520;
|
||||
|
||||
//=============================================================================
|
||||
@ -455,6 +460,8 @@ extern ObCharsetInfo ob_charset_gbk_chinese_ci;
|
||||
extern ObCharsetInfo ob_charset_gbk_bin;
|
||||
extern ObCharsetInfo ob_charset_utf16_general_ci;
|
||||
extern ObCharsetInfo ob_charset_utf16_bin;
|
||||
extern ObCharsetInfo ob_charset_utf16le_general_ci;
|
||||
extern ObCharsetInfo ob_charset_utf16le_bin;
|
||||
extern ObCharsetInfo ob_charset_gb18030_chinese_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_chinese_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_bin;
|
||||
@ -481,12 +488,24 @@ extern ObCharsetInfo ob_charset_ascii;
|
||||
extern ObCharsetInfo ob_charset_ascii_bin;
|
||||
extern ObCharsetInfo ob_charset_tis620_thai_ci;
|
||||
extern ObCharsetInfo ob_charset_tis620_bin;
|
||||
extern ObCharsetInfo ob_charset_sjis_japanese_ci;
|
||||
extern ObCharsetInfo ob_charset_sjis_bin;
|
||||
extern ObCollationHandler ob_collation_mb_bin_handler;
|
||||
extern ObCharsetHandler ob_charset_utf8mb4_handler;
|
||||
extern ObCharsetHandler ob_charset_utf16_handler;
|
||||
extern ObCharsetHandler ob_charset_utf16le_handler;
|
||||
extern ObCollationHandler ob_collation_binary_handler;
|
||||
extern ObCollationHandler ob_collation_8bit_bin_handler;
|
||||
extern ObCollationHandler ob_collation_8bit_simple_ci_handler;
|
||||
extern ObCharsetInfo ob_charset_big5_chinese_ci;
|
||||
extern ObCharsetInfo ob_charset_big5_bin;
|
||||
extern ObCharsetInfo ob_charset_hkscs_bin;
|
||||
extern ObCharsetInfo ob_charset_hkscs31_bin;
|
||||
extern ObCharsetInfo ob_charset_dec8_swedish_ci;
|
||||
extern ObCharsetInfo ob_charset_dec8_bin;
|
||||
extern ObCharsetInfo *uca900_collations[];
|
||||
extern ObCharsetInfo *euro_collations[];
|
||||
|
||||
//=============================================================================
|
||||
|
||||
void ob_fill_8bit(const ObCharsetInfo *cs, char* to, size_t l, int fill);
|
||||
@ -569,36 +588,11 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
ulong *nr1, ulong *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo);
|
||||
|
||||
inline const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16 /*false*/)
|
||||
{
|
||||
const static unsigned SPACE_INT = 0x20202020;
|
||||
const unsigned char *end= ptr + len;
|
||||
if (len > 20 && !is_utf16) {
|
||||
const unsigned char *end_words= (const unsigned char *)(int_ptr)
|
||||
(((ulonglong)(int_ptr)end) / SIZEOF_INT * SIZEOF_INT);
|
||||
const unsigned char *start_words= (const unsigned char *)(int_ptr)
|
||||
((((ulonglong)(int_ptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);
|
||||
ob_charset_assert(((ulonglong)(int_ptr)ptr) >= SIZEOF_INT);
|
||||
if (end_words > ptr) {
|
||||
while (end > end_words && end[-1] == 0x20) {
|
||||
end--;
|
||||
}
|
||||
if (end[-1] == 0x20 && start_words < end_words) {
|
||||
while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) {
|
||||
end -= SIZEOF_INT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_utf16) {
|
||||
while (end - 1 > ptr && end[-2] == 0x00 && end[-1] == 0x20)
|
||||
end-=2;
|
||||
} else {
|
||||
while (end > ptr && end[-1] == 0x20)
|
||||
end--;
|
||||
}
|
||||
return (end);
|
||||
}
|
||||
int ob_strcasecmp_mb(const ObCharsetInfo *cs, const char *s, const char *t);
|
||||
|
||||
const unsigned char *skip_trailing_space(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
|
||||
const unsigned char *skip_trailing_space_utf16(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
|
||||
const unsigned char *skip_trailing_space_utf16le(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
|
||||
|
||||
size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end);
|
||||
|
||||
@ -749,6 +743,10 @@ unsigned int ob_ismbchar_8bit(const ObCharsetInfo *cs __attribute__((unused)), c
|
||||
|
||||
extern "C" void right_to_die_or_duty_to_live_c();
|
||||
|
||||
static inline void OB_PUT_MB2(unsigned char *s, uint16 code) {
|
||||
s[0] = code >> 8;
|
||||
s[1] = code & 0xFF;
|
||||
}
|
||||
|
||||
#endif /* OCEANBASE_LIB_OBMYSQL_OB_CTYPE_ */
|
||||
|
||||
|
4
deps/oblib/src/lib/charset/ob_ctype_ascii.cc
vendored
4
deps/oblib/src/lib/charset/ob_ctype_ascii.cc
vendored
@ -43,7 +43,9 @@ static ObCharsetHandler ob_charset_ascii_handler = {
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit};
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_ascii = {
|
||||
11,0,0,
|
||||
|
426
deps/oblib/src/lib/charset/ob_ctype_big5.cc
vendored
Normal file
426
deps/oblib/src/lib/charset/ob_ctype_big5.cc
vendored
Normal file
@ -0,0 +1,426 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "ob_ctype_big5_tab.h"
|
||||
|
||||
#define hasbig5head(c) (0xa1 <= (unsigned char)(c) && \
|
||||
(unsigned char)(c) <= 0xf9)
|
||||
#define hasbig5tail(c) \
|
||||
((0x40 <= (unsigned char)(c) && \
|
||||
(unsigned char)(c) <= 0x7e) || \
|
||||
(0xa1 <= (unsigned char)(c) && \
|
||||
(unsigned char)(c) <= 0xfe))
|
||||
|
||||
#define isbig5code(c, d) (hasbig5head(c) && hasbig5tail(d))
|
||||
#define big5code(c, d) (((unsigned char)(c) << 8) | (unsigned char)(d))
|
||||
#define getbig5head(e) ((unsigned char)(e >> 8))
|
||||
#define getbig5tail(e) ((unsigned char)(e & 0xff))
|
||||
|
||||
|
||||
static uint16 big5strokexfrm(uint16 i) {
|
||||
// storke order
|
||||
if ((i == 0xA440) || (i == 0xA441))
|
||||
return 0xA440;
|
||||
else if (((i >= 0xA442) && (i <= 0xA453)) || ((i >= 0xC940) && (i <= 0xC944)))
|
||||
return 0xA442;
|
||||
else if (((i >= 0xA454) && (i <= 0xA47E)) || ((i >= 0xC945) && (i <= 0xC94C)))
|
||||
return 0xA454;
|
||||
else if (((i >= 0xA4A1) && (i <= 0xA4FD)) || ((i >= 0xC94D) && (i <= 0xC962)))
|
||||
return 0xA4A1;
|
||||
else if (((i >= 0xA4FE) && (i <= 0xA5DF)) || ((i >= 0xC963) && (i <= 0xC9AA)))
|
||||
return 0xA4FE;
|
||||
else if (((i >= 0xA5E0) && (i <= 0xA6E9)) || ((i >= 0xC9AB) && (i <= 0xCA59)))
|
||||
return 0xA5E0;
|
||||
else if (((i >= 0xA6EA) && (i <= 0xA8C2)) || ((i >= 0xCA5A) && (i <= 0xCBB0)))
|
||||
return 0xA6EA;
|
||||
else if ((i == 0xA260) || ((i >= 0xA8C3) && (i <= 0xAB44)) ||
|
||||
((i >= 0xCBB1) && (i <= 0xCDDC)))
|
||||
return 0xA8C3;
|
||||
else if ((i == 0xA259) || (i == 0xF9DA) || ((i >= 0xAB45) && (i <= 0xADBB)) ||
|
||||
((i >= 0xCDDD) && (i <= 0xD0C7)))
|
||||
return 0xAB45;
|
||||
else if ((i == 0xA25A) || ((i >= 0xADBC) && (i <= 0xB0AD)) ||
|
||||
((i >= 0xD0C8) && (i <= 0xD44A)))
|
||||
return 0xADBC;
|
||||
else if ((i == 0xA25B) || (i == 0xA25C) || ((i >= 0xB0AE) && (i <= 0xB3C2)) ||
|
||||
((i >= 0xD44B) && (i <= 0xD850)))
|
||||
return 0xB0AE;
|
||||
else if ((i == 0xF9DB) || ((i >= 0xB3C3) && (i <= 0xB6C2)) ||
|
||||
((i >= 0xD851) && (i <= 0xDCB0)))
|
||||
return 0xB3C3;
|
||||
else if ((i == 0xA25D) || (i == 0xA25F) || (i == 0xC6A1) || (i == 0xF9D6) ||
|
||||
(i == 0xF9D8) || ((i >= 0xB6C3) && (i <= 0xB9AB)) ||
|
||||
((i >= 0xDCB1) && (i <= 0xE0EF)))
|
||||
return 0xB6C3;
|
||||
else if ((i == 0xF9DC) || ((i >= 0xB9AC) && (i <= 0xBBF4)) ||
|
||||
((i >= 0xE0F0) && (i <= 0xE4E5)))
|
||||
return 0xB9AC;
|
||||
else if ((i == 0xA261) || ((i >= 0xBBF5) && (i <= 0xBEA6)) ||
|
||||
((i >= 0xE4E6) && (i <= 0xE8F3)))
|
||||
return 0xBBF5;
|
||||
else if ((i == 0xA25E) || (i == 0xF9D7) || (i == 0xF9D9) ||
|
||||
((i >= 0xBEA7) && (i <= 0xC074)) || ((i >= 0xE8F4) && (i <= 0xECB8)))
|
||||
return 0xBEA7;
|
||||
else if (((i >= 0xC075) && (i <= 0xC24E)) || ((i >= 0xECB9) && (i <= 0xEFB6)))
|
||||
return 0xC075;
|
||||
else if (((i >= 0xC24F) && (i <= 0xC35E)) || ((i >= 0xEFB7) && (i <= 0xF1EA)))
|
||||
return 0xC24F;
|
||||
else if (((i >= 0xC35F) && (i <= 0xC454)) || ((i >= 0xF1EB) && (i <= 0xF3FC)))
|
||||
return 0xC35F;
|
||||
else if (((i >= 0xC455) && (i <= 0xC4D6)) || ((i >= 0xF3FD) && (i <= 0xF5BF)))
|
||||
return 0xC455;
|
||||
else if (((i >= 0xC4D7) && (i <= 0xC56A)) || ((i >= 0xF5C0) && (i <= 0xF6D5)))
|
||||
return 0xC4D7;
|
||||
else if (((i >= 0xC56B) && (i <= 0xC5C7)) || ((i >= 0xF6D6) && (i <= 0xF7CF)))
|
||||
return 0xC56B;
|
||||
else if (((i >= 0xC5C8) && (i <= 0xC5F0)) || ((i >= 0xF7D0) && (i <= 0xF8A4)))
|
||||
return 0xC5C8;
|
||||
else if (((i >= 0xC5F1) && (i <= 0xC654)) || ((i >= 0xF8A5) && (i <= 0xF8ED)))
|
||||
return 0xC5F1;
|
||||
else if (((i >= 0xC655) && (i <= 0xC664)) || ((i >= 0xF8EE) && (i <= 0xF96A)))
|
||||
return 0xC655;
|
||||
else if (((i >= 0xC665) && (i <= 0xC66B)) || ((i >= 0xF96B) && (i <= 0xF9A1)))
|
||||
return 0xC665;
|
||||
else if (((i >= 0xC66C) && (i <= 0xC675)) || ((i >= 0xF9A2) && (i <= 0xF9B9)))
|
||||
return 0xC66C;
|
||||
else if (((i >= 0xC676) && (i <= 0xC678)) || ((i >= 0xF9BA) && (i <= 0xF9C5)))
|
||||
return 0xC676;
|
||||
else if (((i >= 0xC679) && (i <= 0xC67C)) || ((i >= 0xF9C7) && (i <= 0xF9CB)))
|
||||
return 0xC679;
|
||||
else if ((i == 0xC67D) || ((i >= 0xF9CC) && (i <= 0xF9CF)))
|
||||
return 0xC67D;
|
||||
else if (i == 0xF9D0)
|
||||
return 0xF9D0;
|
||||
else if ((i == 0xC67E) || (i == 0xF9D1))
|
||||
return 0xC67E;
|
||||
else if ((i == 0xF9C6) || (i == 0xF9D2))
|
||||
return 0xF9C6;
|
||||
else if (i == 0xF9D3)
|
||||
return 0xF9D3;
|
||||
else if (i == 0xF9D4)
|
||||
return 0xF9D4;
|
||||
else if (i == 0xF9D5)
|
||||
return 0xF9D5;
|
||||
return 0xA140;
|
||||
}
|
||||
|
||||
|
||||
static int ob_strnncoll_big5_internal(const unsigned char **a_res, const unsigned char **b_res,
|
||||
size_t length) {
|
||||
const unsigned char *a = *a_res, *b = *b_res;
|
||||
|
||||
while (length--) {
|
||||
if ((length > 0) && isbig5code(*a, *(a + 1)) && isbig5code(*b, *(b + 1))) {
|
||||
if (*a != *b || *(a + 1) != *(b + 1))
|
||||
return ((int)big5code(*a, *(a + 1)) - (int)big5code(*b, *(b + 1)));
|
||||
a += 2;
|
||||
b += 2;
|
||||
length--;
|
||||
} else if (sort_order_big5[*a++] != sort_order_big5[*b++])
|
||||
return ((int)sort_order_big5[a[-1]] - (int)sort_order_big5[b[-1]]);
|
||||
}
|
||||
*a_res = a;
|
||||
*b_res = b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare strings */
|
||||
extern "C" {
|
||||
static int ob_strnncoll_big5(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const unsigned char *a, size_t a_length, const unsigned char *b,
|
||||
size_t b_length, bool b_is_prefix) {
|
||||
size_t length = std::min(a_length, b_length);
|
||||
int res = ob_strnncoll_big5_internal(&a, &b, length);
|
||||
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
|
||||
}
|
||||
|
||||
/* compare strings, ignore end space */
|
||||
|
||||
static int ob_strnncollsp_big5(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const unsigned char *a, size_t a_length, const unsigned char *b,
|
||||
size_t b_length, bool diff_if_only_endspace_difference) {
|
||||
size_t length = std::min(a_length, b_length);
|
||||
int res = ob_strnncoll_big5_internal(&a, &b, length);
|
||||
|
||||
if (!res && a_length != b_length) {
|
||||
const unsigned char *end;
|
||||
int swap = 1;
|
||||
/*
|
||||
Check the next not space character of the longer key. If it's < ' ',
|
||||
then it's smaller than the other key.
|
||||
*/
|
||||
if (a_length < b_length) {
|
||||
/* put longer key in a */
|
||||
a_length = b_length;
|
||||
a = b;
|
||||
swap = -1; /* swap sign of result */
|
||||
res = -res;
|
||||
}
|
||||
for (end = a + a_length - length; a < end; a++) {
|
||||
if (*a != ' ') return (*a < ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static size_t ob_strnxfrm_big5(const ObCharsetInfo *cs, unsigned char *dst,
|
||||
size_t dstlen, uint nweights, const unsigned char *src,
|
||||
size_t srclen, uint flags, bool *is_valid_unicode) {
|
||||
unsigned char *d0 = dst;
|
||||
unsigned char *de = dst + dstlen;
|
||||
const unsigned char *se = src + srclen;
|
||||
const unsigned char *sort_order = cs->sort_order;
|
||||
|
||||
for (; dst < de && src < se && nweights; nweights--) {
|
||||
if (cs->cset->ismbchar(cs, (const char *)src, (const char *)se)) {
|
||||
/*
|
||||
Note, it is safe not to check (src < se)
|
||||
in the code below, because ismbchar() would
|
||||
not return TRUE if src was too short
|
||||
*/
|
||||
uint16 e = big5strokexfrm((uint16)big5code(*src, *(src + 1)));
|
||||
*dst++ = getbig5head(e);
|
||||
if (dst < de) *dst++ = getbig5tail(e);
|
||||
src += 2;
|
||||
} else
|
||||
*dst++ = sort_order ? sort_order[*src++] : *src++;
|
||||
}
|
||||
return ob_strxfrm_pad(cs, d0, dst, de, nweights, flags);
|
||||
}
|
||||
|
||||
static unsigned int ismbchar_big5(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const char *p, const char *e) {
|
||||
return (hasbig5head(*(p)) && (e) - (p) > 1 && hasbig5tail(*((p) + 1)) ? 2 : 0);
|
||||
}
|
||||
|
||||
static unsigned int mbcharlen_big5(const ObCharsetInfo *cs [[maybe_unused]], uint c) {
|
||||
return (hasbig5head(c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns a well formed length of a BIG5 string.
|
||||
CP950 and SCS additional characters are also accepted.
|
||||
*/
|
||||
static size_t ob_well_formed_len_big5(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const char *b, const char *e, size_t pos,
|
||||
int *error) {
|
||||
const char *b0 = b;
|
||||
const char *emb = e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error = 0;
|
||||
while (pos-- && b < e) {
|
||||
if ((unsigned char)b[0] < 128) {
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
} else if ((b < emb) && isbig5code((unsigned char)*b, (unsigned char)b[1])) {
|
||||
/* Double byte character */
|
||||
b += 2;
|
||||
} else {
|
||||
/* Wrong byte sequence */
|
||||
*error = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t)(b - b0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static ObUnicaseInfo ob_caseinfo_big5 = {0xFFFF, ob_caseinfo_pages_big5};
|
||||
|
||||
static int func_big5_uni_onechar(int code) {
|
||||
if ((code >= 0xA140) && (code <= 0xC7FC))
|
||||
return (tab_big5_uni0[code - 0xA140]);
|
||||
if ((code >= 0xC940) && (code <= 0xF9DC))
|
||||
return (tab_big5_uni1[code - 0xC940]);
|
||||
return (0);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
static int func_uni_big5_onechar(int code) {
|
||||
if ((code >= 0x00A2) && (code <= 0x00F7))
|
||||
return (tab_uni_big50[code - 0x00A2]);
|
||||
if ((code >= 0x02C7) && (code <= 0x0451))
|
||||
return (tab_uni_big51[code - 0x02C7]);
|
||||
if ((code >= 0x2013) && (code <= 0x22BF))
|
||||
return (tab_uni_big52[code - 0x2013]);
|
||||
if ((code >= 0x2460) && (code <= 0x2642))
|
||||
return (tab_uni_big53[code - 0x2460]);
|
||||
if ((code >= 0x3000) && (code <= 0x3129))
|
||||
return (tab_uni_big54[code - 0x3000]);
|
||||
if ((code >= 0x32A3) && (code <= 0x32A3))
|
||||
return (tab_uni_big55[code - 0x32A3]);
|
||||
if ((code >= 0x338E) && (code <= 0x33D5))
|
||||
return (tab_uni_big56[code - 0x338E]);
|
||||
if ((code >= 0x4E00) && (code <= 0x9483))
|
||||
return (tab_uni_big57[code - 0x4E00]);
|
||||
if ((code >= 0x9577) && (code <= 0x9FA4))
|
||||
return (tab_uni_big58[code - 0x9577]);
|
||||
if ((code >= 0xFA0C) && (code <= 0xFA0D))
|
||||
return (tab_uni_big59[code - 0xFA0C]);
|
||||
if ((code >= 0xFE30) && (code <= 0xFFFD))
|
||||
return (tab_uni_big510[code - 0xFE30]);
|
||||
return (0);
|
||||
}
|
||||
static int ob_wc_mb_big5(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
|
||||
unsigned char *s, unsigned char *e) {
|
||||
int code;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((int)wc < 0x80) {
|
||||
s[0] = (unsigned char)wc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(code = func_uni_big5_onechar(wc))) return OB_CS_ILUNI;
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL;
|
||||
|
||||
s[0] = code >> 8;
|
||||
s[1] = code & 0xFF;
|
||||
|
||||
return 2;
|
||||
}
|
||||
static int ob_mb_wc_big5(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
|
||||
const unsigned char *s, const unsigned char *e) {
|
||||
int hi;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((hi = s[0]) < 0x80) {
|
||||
pwc[0] = hi;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL2;
|
||||
|
||||
if (!(pwc[0] = func_big5_uni_onechar((hi << 8) + s[1]))) return -2;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static ObCollationHandler ob_collation_big5_chinese_ci_handler = {
|
||||
NULL, /* init */
|
||||
NULL,
|
||||
ob_strnncoll_big5,
|
||||
ob_strnncollsp_big5,
|
||||
ob_strnxfrm_big5,
|
||||
ob_strnxfrmlen_simple,
|
||||
NULL,
|
||||
ob_like_range_mb,
|
||||
ob_wildcmp_mb,
|
||||
ob_strcasecmp_mb,
|
||||
ob_instr_mb,
|
||||
ob_hash_sort_simple,
|
||||
ob_propagate_simple};
|
||||
|
||||
static ObCharsetHandler ob_charset_big5_handler = {NULL,
|
||||
ismbchar_big5,
|
||||
mbcharlen_big5,
|
||||
ob_numchars_mb,
|
||||
ob_charpos_mb,
|
||||
ob_max_bytes_charpos_mb, /* max_byptes charpos */
|
||||
ob_well_formed_len_big5,
|
||||
ob_lengthsp_8bit,
|
||||
ob_mb_wc_big5, /* mb_wc */
|
||||
ob_wc_mb_big5, /* wc_mb */
|
||||
ob_mb_ctype_mb,
|
||||
ob_caseup_mb,
|
||||
ob_casedn_mb,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space};
|
||||
|
||||
|
||||
ObCharsetInfo ob_charset_big5_chinese_ci = {
|
||||
1,
|
||||
0,
|
||||
0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM, /* state */
|
||||
"big5", /* cs name */
|
||||
"big5_chinese_ci", /* m_coll_name */
|
||||
"Big5 Traditional Chinese", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_big5,
|
||||
to_lower_big5,
|
||||
to_upper_big5,
|
||||
sort_order_big5,
|
||||
NULL, /* uca */
|
||||
NULL, /* tab_to_uni */
|
||||
NULL, /* tab_from_uni */
|
||||
&ob_caseinfo_big5, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
0xF9D5, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
true, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_big5_handler,
|
||||
&ob_collation_big5_chinese_ci_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
ObCharsetInfo ob_charset_big5_bin = {
|
||||
84,
|
||||
0,
|
||||
0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
|
||||
"big5", /* cs name */
|
||||
"big5_bin", /* m_coll_name */
|
||||
"Big5 Traditional Chinese", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_big5,
|
||||
to_lower_big5,
|
||||
to_upper_big5,
|
||||
NULL,
|
||||
NULL, /* uca */
|
||||
NULL, /* tab_to_uni */
|
||||
NULL, /* tab_from_uni */
|
||||
&ob_caseinfo_big5, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
0xF9FE, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
true, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_big5_handler,
|
||||
&ob_collation_mb_bin_handler,
|
||||
PAD_SPACE};
|
6169
deps/oblib/src/lib/charset/ob_ctype_big5_tab.h
vendored
Normal file
6169
deps/oblib/src/lib/charset/ob_ctype_big5_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
7
deps/oblib/src/lib/charset/ob_ctype_bin.cc
vendored
7
deps/oblib/src/lib/charset/ob_ctype_bin.cc
vendored
@ -343,14 +343,14 @@ loop:
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs,
|
||||
const uchar *key, size_t len, ulong *nr1, ulong *nr2, const bool calc_end_space, hash_algo hash_algo)
|
||||
{
|
||||
const uchar *pos = key;
|
||||
key += len;
|
||||
//trailing space to make 'A ' == 'A'
|
||||
if (!calc_end_space) {
|
||||
key = skip_trailing_space(pos, len, 0);
|
||||
key = cs->cset->skip_trailing_space(cs, pos, len);
|
||||
}
|
||||
if (NULL == hash_algo)
|
||||
{
|
||||
@ -413,7 +413,8 @@ static ObCharsetHandler ob_charset_handler=
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCollationHandler ob_collation_8bit_bin_handler =
|
||||
|
322
deps/oblib/src/lib/charset/ob_ctype_extra.cc
vendored
Normal file
322
deps/oblib/src/lib/charset/ob_ctype_extra.cc
vendored
Normal file
@ -0,0 +1,322 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
|
||||
static unsigned char ctype_dec8_swedish_ci[] = {
|
||||
0x00,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x28,0x28,0x28,0x28,0x28,0x20,0x20,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
|
||||
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x81,0x81,0x81,0x81,0x81,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x82,0x82,0x82,0x82,0x82,0x82,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x10,0x10,0x10,0x20,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02
|
||||
};
|
||||
|
||||
static unsigned char to_lower_dec8_swedish_ci[] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
||||
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
||||
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
||||
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
||||
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
|
||||
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
||||
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
|
||||
};
|
||||
|
||||
static unsigned char to_upper_dec8_swedish_ci[] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
||||
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
||||
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
||||
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
||||
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
||||
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
||||
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
|
||||
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
||||
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xF7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xFF
|
||||
};
|
||||
|
||||
static unsigned char sort_order_dec8_swedish_ci[] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
||||
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
||||
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
||||
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
||||
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
||||
0x41,0x41,0x41,0x41,0x5C,0x5B,0x5C,0x43,0x45,0x45,0x45,0x45,0x49,0x49,0x49,0x49,
|
||||
0x44,0x4E,0x4F,0x4F,0x4F,0x4F,0x5D,0xD7,0xD8,0x55,0x55,0x55,0x59,0x59,0xDE,0xDF,
|
||||
0x41,0x41,0x41,0x41,0x5C,0x5B,0x5C,0x43,0x45,0x45,0x45,0x45,0x49,0x49,0x49,0x49,
|
||||
0x44,0x4E,0x4F,0x4F,0x4F,0x4F,0x5D,0xF7,0xD8,0x55,0x55,0x55,0x59,0x59,0xDE,0xFF
|
||||
};
|
||||
|
||||
static uint16 to_uni_dec8_swedish_ci[] = {
|
||||
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
|
||||
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
|
||||
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
|
||||
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
|
||||
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
|
||||
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
|
||||
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
|
||||
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
|
||||
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
|
||||
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
|
||||
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
|
||||
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
|
||||
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
|
||||
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
|
||||
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
|
||||
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
|
||||
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
|
||||
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
|
||||
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
|
||||
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
|
||||
0x00A0,0x00A1,0x00A2,0x00A3,0x0000,0x00A5,0x0000,0x00A7,
|
||||
0x00A4,0x00A9,0x00AA,0x00AB,0x0000,0x0000,0x0000,0x0000,
|
||||
0x00B0,0x00B1,0x00B2,0x00B3,0x0000,0x00B5,0x00B6,0x00B7,
|
||||
0x0000,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x0000,0x00BF,
|
||||
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
|
||||
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
|
||||
0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x0152,
|
||||
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0178,0x0000,0x00DF,
|
||||
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
|
||||
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
|
||||
0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x0153,
|
||||
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FF,0x0000,0x0000
|
||||
};
|
||||
|
||||
|
||||
static unsigned char ctype_dec8_bin[] = {
|
||||
0x00,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x28,0x28,0x28,0x28,0x28,0x20,0x20,
|
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
|
||||
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x81,0x81,0x81,0x81,0x81,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x82,0x82,0x82,0x82,0x82,0x82,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x10,0x10,0x10,0x20,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
|
||||
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
|
||||
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02
|
||||
};
|
||||
|
||||
static unsigned char to_lower_dec8_bin[] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
||||
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
||||
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
||||
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
||||
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
||||
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
||||
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
||||
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
|
||||
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
||||
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
|
||||
};
|
||||
|
||||
static unsigned char to_upper_dec8_bin[] = {
|
||||
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
||||
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
||||
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
||||
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
||||
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
||||
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
||||
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
||||
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
|
||||
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
|
||||
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
||||
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
||||
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
||||
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
|
||||
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
||||
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xF7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xFF
|
||||
};
|
||||
|
||||
static uint16 to_uni_dec8_bin[] = {
|
||||
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
|
||||
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
|
||||
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
|
||||
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
|
||||
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
|
||||
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
|
||||
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
|
||||
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
|
||||
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
|
||||
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
|
||||
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
|
||||
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
|
||||
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
|
||||
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
|
||||
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
|
||||
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
|
||||
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
|
||||
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
|
||||
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
|
||||
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
|
||||
0x00A0,0x00A1,0x00A2,0x00A3,0x0000,0x00A5,0x0000,0x00A7,
|
||||
0x00A4,0x00A9,0x00AA,0x00AB,0x0000,0x0000,0x0000,0x0000,
|
||||
0x00B0,0x00B1,0x00B2,0x00B3,0x0000,0x00B5,0x00B6,0x00B7,
|
||||
0x0000,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x0000,0x00BF,
|
||||
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
|
||||
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
|
||||
0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x0152,
|
||||
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0178,0x0000,0x00DF,
|
||||
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
|
||||
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
|
||||
0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x0153,
|
||||
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FF,0x0000,0x0000
|
||||
};
|
||||
|
||||
|
||||
static ObCharsetHandler ob_charset_8bit_handler = {
|
||||
ob_cset_init_8bit,
|
||||
NULL,
|
||||
ob_mbcharlen_8bit,
|
||||
ob_numchars_8bit,
|
||||
ob_charpos_8bit,
|
||||
ob_max_bytes_charpos_8bit,
|
||||
ob_well_formed_len_8bit,
|
||||
ob_lengthsp_8bit,
|
||||
//ob_numcells_8bit,
|
||||
ob_mb_wc_8bit,
|
||||
ob_wc_mb_8bit,
|
||||
ob_mb_ctype_8bit,
|
||||
//ob_caseup_str_8bit,
|
||||
//ob_casedn_str_8bit,
|
||||
ob_caseup_8bit,
|
||||
ob_casedn_8bit,
|
||||
//ob_snprintf_8bit,
|
||||
//ob_long10_to_str_8bit,
|
||||
//ob_longlong10_to_str_8bit,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
|
||||
ObCharsetInfo ob_charset_dec8_swedish_ci = {
|
||||
3,0,0,
|
||||
OB_CS_COMPILED|OB_CS_PRIMARY,
|
||||
"dec8", /* csname */
|
||||
"dec8_swedish_ci", /* m_collname */
|
||||
"DEC West European", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_dec8_swedish_ci, /* ctype */
|
||||
to_lower_dec8_swedish_ci, /* to_lower */
|
||||
to_upper_dec8_swedish_ci, /* to_upper */
|
||||
sort_order_dec8_swedish_ci, /* sort_order */
|
||||
NULL, /* uca */
|
||||
to_uni_dec8_swedish_ci, /* to_uni */
|
||||
NULL, /* from_uni */
|
||||
&ob_unicase_default, /* caseinfo */
|
||||
NULL, /* state map */
|
||||
NULL, /* ident map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
255, /* max_sort_char */
|
||||
' ', /* pad_char */
|
||||
false, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_8bit_handler,
|
||||
&ob_collation_8bit_simple_ci_handler,
|
||||
PAD_SPACE /* pad_attribute */
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_dec8_bin = {
|
||||
69,0,0,
|
||||
OB_CS_COMPILED|OB_CS_BINSORT,
|
||||
"dec8", /* csname */
|
||||
"dec8_bin", /* m_collname */
|
||||
"DEC West European", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_dec8_bin, /* ctype */
|
||||
to_lower_dec8_bin, /* to_lower */
|
||||
to_upper_dec8_bin, /* to_upper */
|
||||
NULL, /* sort_order */
|
||||
NULL, /* uca */
|
||||
to_uni_dec8_bin, /* to_uni */
|
||||
NULL, /* from_uni */
|
||||
&ob_unicase_default, /* caseinfo */
|
||||
NULL, /* state map */
|
||||
NULL, /* ident map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
255, /* max_sort_char */
|
||||
' ', /* pad_char */
|
||||
false, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_8bit_handler,
|
||||
&ob_collation_8bit_bin_handler,
|
||||
PAD_SPACE /* pad_attribute */
|
||||
};
|
@ -989,7 +989,9 @@ static ObCharsetHandler ob_charset_gb18030_handler = {
|
||||
ob_strntod_8bit,
|
||||
// my_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit};
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_gb18030_chinese_ci = {
|
||||
oceanbase::common::CS_TYPE_GB18030_CHINESE_CI,
|
||||
@ -1620,7 +1622,8 @@ static ObCharsetHandler ob_charset_gb18030_2022_handler =
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_gb18030_2022_bin =
|
||||
|
3
deps/oblib/src/lib/charset/ob_ctype_gbk.cc
vendored
3
deps/oblib/src/lib/charset/ob_ctype_gbk.cc
vendored
@ -368,7 +368,8 @@ static ObCharsetHandler ob_charset_gbk_handler=
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
|
||||
|
349
deps/oblib/src/lib/charset/ob_ctype_hkscs.cc
vendored
Normal file
349
deps/oblib/src/lib/charset/ob_ctype_hkscs.cc
vendored
Normal file
@ -0,0 +1,349 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "ob_ctype_hkscs_tab.h"
|
||||
#include "ob_ctype_hkscs31_tab.h"
|
||||
#include "ob_template_helper.h"
|
||||
|
||||
/*
|
||||
this is different form mysql is hkscs for the newly added char in hkscs
|
||||
*/
|
||||
#define ishkscshead(c) (0x81 <= (unsigned char)(c) && (unsigned char)(c) <= 0xfe)
|
||||
#define ishkscstail(c) \
|
||||
((0x40 <= (unsigned char)(c) && (unsigned char)(c) <= 0x7e) || \
|
||||
(0xa1 <= (unsigned char)(c) && (unsigned char)(c) <= 0xfe))
|
||||
|
||||
#define ishkscscode(c, d) (ishkscshead(c) && ishkscstail(d))
|
||||
#define hkscscode(c, d) (((unsigned char)(c) << 8) | (unsigned char)(d))
|
||||
#define hkscshead(e) ((unsigned char)(e >> 8))
|
||||
#define hkscstail(e) ((unsigned char)(e & 0xff))
|
||||
|
||||
extern "C" {
|
||||
static unsigned int ismbchar_hkscs(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const char *p, const char *e) {
|
||||
return (ishkscshead(*(p)) && (e) - (p) > 1 && ishkscstail(*((p) + 1)) ? 2 : 0);
|
||||
}
|
||||
|
||||
static unsigned int mbcharlen_hkscs(const ObCharsetInfo *cs [[maybe_unused]], uint c) {
|
||||
return (ishkscshead(c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Returns a well formed length of a hkscs string.
|
||||
CP950 and HKSCS additional characters are also accepted.
|
||||
*/
|
||||
static size_t ob_well_formed_len_hkscs(const ObCharsetInfo *cs [[maybe_unused]],
|
||||
const char *b, const char *e, size_t pos,
|
||||
int *error) {
|
||||
const char *b0 = b;
|
||||
const char *emb = e - 1; /* Last possible end of an MB character */
|
||||
|
||||
*error = 0;
|
||||
while (pos-- && b < e) {
|
||||
if ((unsigned char)b[0] < 128) {
|
||||
/* Single byte ascii character */
|
||||
b++;
|
||||
} else if ((b < emb) && ishkscscode((unsigned char)*b, (unsigned char)b[1])) {
|
||||
/* Double byte character */
|
||||
b += 2;
|
||||
} else {
|
||||
/* Wrong byte sequence */
|
||||
*error = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t)(b - b0);
|
||||
}
|
||||
}
|
||||
|
||||
static ObUnicaseInfo ob_caseinfo_hk = {0xFFFF, ob_caseinfo_pages_hkscs};
|
||||
|
||||
static int func_hkscs_uni_onechar(int code) {
|
||||
auto iter = hkscs_to_uni_map.find(code);
|
||||
if (iter != hkscs_to_uni_map.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int func_uni_hkscs_onechar(int code) {
|
||||
auto iter = uni_to_hkscs_map.find(code);
|
||||
if (iter != uni_to_hkscs_map.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int func_hkscs31_uni_onechar(int code) {
|
||||
auto iter = hkscs31_to_uni_map.find(code);
|
||||
if (iter != hkscs31_to_uni_map.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int func_uni_hkscs31_onechar(int code) {
|
||||
auto iter = uni_to_hkscs31_map.find(code);
|
||||
if (iter != uni_to_hkscs31_map.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
extern "C" {
|
||||
// unicode to hkscs
|
||||
static int ob_wc_mb_hkscs(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
|
||||
unsigned char *s, unsigned char *e) {
|
||||
int code;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((int)wc < 0x80) {
|
||||
s[0] = (uchar)wc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(code = func_uni_hkscs_onechar(wc))) return OB_CS_ILUNI;
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL;
|
||||
|
||||
s[0] = code >> 8;
|
||||
s[1] = code & 0xFF;
|
||||
|
||||
return 2;
|
||||
}
|
||||
// hkscs to unicode
|
||||
static int ob_mb_wc_hkscs(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
|
||||
const unsigned char *s, const unsigned char *e) {
|
||||
int hi;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((hi = s[0]) < 0x80) {
|
||||
pwc[0] = hi;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL2;
|
||||
|
||||
if (!(pwc[0] = func_hkscs_uni_onechar((hi << 8) + s[1]))) return -2;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
// unicode to hkscs31
|
||||
static int ob_wc_mb_hkscs31(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
|
||||
unsigned char *s, unsigned char *e) {
|
||||
int code;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((int)wc < 0x80) {
|
||||
s[0] = (uchar)wc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(code = func_uni_hkscs31_onechar(wc))) return OB_CS_ILUNI;
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL;
|
||||
|
||||
s[0] = code >> 8;
|
||||
s[1] = code & 0xFF;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
// hkscs31 to unicode
|
||||
static int ob_mb_wc_hkscs31(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
|
||||
const unsigned char *s, const unsigned char *e) {
|
||||
int hi;
|
||||
|
||||
if (s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
if ((hi = s[0]) < 0x80) {
|
||||
pwc[0] = hi;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (s + 2 > e) return OB_CS_TOOSMALL2;
|
||||
|
||||
if (!(pwc[0] = func_hkscs31_uni_onechar((hi << 8) + s[1]))) return -2;
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool hkscs_init(ObCharsetInfo *cs, ObCharsetLoader *loader) {
|
||||
bool succ = true;
|
||||
pair<decltype(hkscs_to_uni_map.begin()), bool> ret;
|
||||
if (hkscs_to_uni_map.size() == 0) {
|
||||
for (int i = 0; i < array_elements(hkscs_to_uni_map_array) && succ; ++i) {
|
||||
ret = hkscs_to_uni_map.insert(hkscs_to_uni_map_array[i]);
|
||||
succ = succ && ret.second;
|
||||
}
|
||||
}
|
||||
if (succ && uni_to_hkscs_map.size() == 0) {
|
||||
for (int i = 0; i < array_elements(uni_to_hkscs_map_array) && succ; ++i) {
|
||||
ret = uni_to_hkscs_map.insert(uni_to_hkscs_map_array[i]);
|
||||
succ = succ && ret.second;
|
||||
}
|
||||
}
|
||||
hkscs_to_uni_map.rehash(20019);
|
||||
uni_to_hkscs_map.rehash(20019);
|
||||
return succ;
|
||||
}
|
||||
|
||||
bool hkscs31_init(ObCharsetInfo *cs, ObCharsetLoader *loader) {
|
||||
bool succ = true;
|
||||
pair<decltype(hkscs31_to_uni_map.begin()), bool> ret;
|
||||
if (hkscs31_to_uni_map.size() == 0) {
|
||||
for (int i = 0; i < array_elements(hkscs31_to_uni_map_array) && succ; ++i) {
|
||||
ret = hkscs31_to_uni_map.insert(hkscs31_to_uni_map_array[i]);
|
||||
succ = succ && ret.second;
|
||||
}
|
||||
}
|
||||
pair<decltype(uni_to_hkscs31_map.begin()), bool> rett;
|
||||
if (succ && uni_to_hkscs31_map.size() == 0) {
|
||||
for (int i = 0; i < array_elements(uni_to_hkscs31_map_array) && succ; ++i) {
|
||||
rett = uni_to_hkscs31_map.insert(uni_to_hkscs31_map_array[i]);
|
||||
succ = succ && rett.second;
|
||||
}
|
||||
}
|
||||
hkscs31_to_uni_map.rehash(20019);
|
||||
uni_to_hkscs31_map.rehash(20019);
|
||||
return succ;
|
||||
}
|
||||
|
||||
static ObCharsetHandler ob_charset_hkscs_handler = {
|
||||
hkscs_init,
|
||||
ismbchar_hkscs,
|
||||
mbcharlen_hkscs,
|
||||
ob_numchars_mb,
|
||||
ob_charpos_mb,
|
||||
ob_max_bytes_charpos_mb, /* max_bytes charpos */
|
||||
ob_well_formed_len_hkscs,
|
||||
ob_lengthsp_8bit,
|
||||
ob_mb_wc_hkscs, /* mb_wc */
|
||||
ob_wc_mb_hkscs, /* wc_mb */
|
||||
ob_mb_ctype_mb,
|
||||
ob_caseup_mb,
|
||||
ob_casedn_mb,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
static ObCharsetHandler ob_charset_hkscs31_handler = {
|
||||
hkscs31_init,
|
||||
ismbchar_hkscs,
|
||||
mbcharlen_hkscs,
|
||||
ob_numchars_mb,
|
||||
ob_charpos_mb,
|
||||
ob_max_bytes_charpos_mb, /* max_bytes charpos */
|
||||
ob_well_formed_len_hkscs,
|
||||
ob_lengthsp_8bit,
|
||||
ob_mb_wc_hkscs31, /* mb_wc */
|
||||
ob_wc_mb_hkscs31, /* wc_mb */
|
||||
ob_mb_ctype_mb,
|
||||
ob_caseup_mb,
|
||||
ob_casedn_mb,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_hkscs_bin = {
|
||||
152,
|
||||
0,
|
||||
0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
|
||||
"hkscs", /* cs name */
|
||||
"hkscs_bin", /* m_coll_name */
|
||||
"HKSCS 2000 Traditional Chinese", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_hkscs,
|
||||
to_lower_hkscs,
|
||||
to_upper_hkscs,
|
||||
NULL,
|
||||
NULL, /* uca */
|
||||
NULL, /* tab_to_uni */
|
||||
NULL, /* tab_from_uni */
|
||||
&ob_caseinfo_hk, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
0xFEFE, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
true, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_hkscs_handler,
|
||||
&ob_collation_mb_bin_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
ObCharsetInfo ob_charset_hkscs31_bin = {
|
||||
153,
|
||||
0,
|
||||
0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
|
||||
"hkscs31", /* cs name */
|
||||
"hkscs31_bin", /* m_coll_name */
|
||||
"HKSCS 2001 Traditional Chinese", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_hkscs,
|
||||
to_lower_hkscs,
|
||||
to_upper_hkscs,
|
||||
NULL,
|
||||
NULL, /* uca */
|
||||
NULL, /* tab_to_uni */
|
||||
NULL, /* tab_from_uni */
|
||||
&ob_caseinfo_hk, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
1, /* mbmaxlenlen */
|
||||
0, /* min_sort_char */
|
||||
0xFEFE, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
true, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_hkscs31_handler,
|
||||
&ob_collation_mb_bin_handler,
|
||||
PAD_SPACE};
|
2504
deps/oblib/src/lib/charset/ob_ctype_hkscs31_tab.h
vendored
Normal file
2504
deps/oblib/src/lib/charset/ob_ctype_hkscs31_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4394
deps/oblib/src/lib/charset/ob_ctype_hkscs_tab.h
vendored
Normal file
4394
deps/oblib/src/lib/charset/ob_ctype_hkscs_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -67,7 +67,8 @@ static ObCharsetHandler ob_charset_latin1_handler=
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_latin1 = {
|
||||
|
30
deps/oblib/src/lib/charset/ob_ctype_mb.cc
vendored
30
deps/oblib/src/lib/charset/ob_ctype_mb.cc
vendored
@ -238,6 +238,28 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
return (str != str_end ? 1 : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
ob_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
|
||||
*/
|
||||
|
||||
int ob_strcasecmp_mb(const ObCharsetInfo *cs, const char *s, const char *t) {
|
||||
uint32 l;
|
||||
const uchar *map = cs->to_upper;
|
||||
|
||||
while (*s && *t) {
|
||||
/* Pointing after the '\0' is safe here. */
|
||||
if ((l = ob_ismbchar(cs, s, s + cs->mbmaxlen))) {
|
||||
while (l--)
|
||||
if (*s++ != *t++) return 1;
|
||||
} else if (ob_mbcharlen(cs, *t) != 1 ||
|
||||
map[(uchar)*s++] != map[(uchar)*t++])
|
||||
return 1;
|
||||
}
|
||||
/* At least one of '*s' and '*t' is zero here. */
|
||||
assert(!*t || !*s);
|
||||
return (*t != *s);
|
||||
}
|
||||
|
||||
unsigned int __attribute__ ((noinline)) ob_instr_mb_help(size_t s_length, ob_match_t *match, unsigned int nmatch)
|
||||
{
|
||||
if (!s_length) {
|
||||
@ -439,7 +461,7 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length)
|
||||
{
|
||||
const char *end;
|
||||
end= (const char *) skip_trailing_space((const unsigned char *)ptr, length, 0);
|
||||
end = (const char *) cs->cset->skip_trailing_space(cs, (const unsigned char *)ptr, length); // 8bit not utf16
|
||||
return (size_t) (end-ptr);
|
||||
}
|
||||
|
||||
@ -693,7 +715,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
|
||||
str += mb_len;
|
||||
break;
|
||||
}
|
||||
} else if (!ob_ismbchar(cs, str, str_end) && *str == cmp) {
|
||||
} else if (!ob_ismbchar(cs, str, str_end) && static_cast<unsigned char>(*str) == cmp) {
|
||||
str++;
|
||||
break;
|
||||
}
|
||||
@ -723,14 +745,14 @@ int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
|
||||
escape_char, w_one, w_many, 1);
|
||||
}
|
||||
|
||||
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs,
|
||||
const unsigned char *key, size_t len,unsigned long int *nr1, unsigned long int *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo)
|
||||
{
|
||||
const unsigned char *pos = key;
|
||||
|
||||
if (!calc_end_space) {
|
||||
key= skip_trailing_space(key, len, 0);
|
||||
key = cs->cset->skip_trailing_space(cs, key, len); // use in utf8 not utf16
|
||||
} else {
|
||||
key += len;
|
||||
}
|
||||
|
30
deps/oblib/src/lib/charset/ob_ctype_simple.cc
vendored
30
deps/oblib/src/lib/charset/ob_ctype_simple.cc
vendored
@ -24,7 +24,6 @@
|
||||
|
||||
#define CUTOFF (UINT64_MAX / 10)
|
||||
#define CUTLIM (UINT64_MAX % 10)
|
||||
#define SPACE_INT 0x20202020
|
||||
#define DIGITS_IN_ULONGLONG 20
|
||||
#define PLANE_SIZE 0x100
|
||||
#define PLANE_NUM 0x100
|
||||
@ -765,7 +764,7 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
const unsigned char *end;
|
||||
unsigned char data[HASH_BUFFER_LENGTH];
|
||||
int length = 0;
|
||||
end= calc_end_space ? key + len : skip_trailing_space(key, len, 0);
|
||||
end= calc_end_space ? key + len : cs->cset->skip_trailing_space(cs, key, len); // used in gbk sjis tis620
|
||||
|
||||
if (NULL == hash_algo) {
|
||||
for (; key < (unsigned char*) end ; key++) {
|
||||
@ -785,7 +784,32 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
}
|
||||
}
|
||||
|
||||
#define SPACE_INT 0x20202020
|
||||
const unsigned char *skip_trailing_space(const struct ObCharsetInfo *cs __attribute__((unused)),const unsigned char *ptr,size_t len)
|
||||
{
|
||||
const static unsigned SPACE_INT = 0x20202020;
|
||||
const unsigned char *end= ptr + len;
|
||||
if (len > 20) {
|
||||
const unsigned char *end_words= (const unsigned char *)(int_ptr)
|
||||
(((ulonglong)(int_ptr)end) / SIZEOF_INT * SIZEOF_INT);
|
||||
const unsigned char *start_words= (const unsigned char *)(int_ptr)
|
||||
((((ulonglong)(int_ptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);
|
||||
ob_charset_assert(((ulonglong)(int_ptr)ptr) >= SIZEOF_INT);
|
||||
if (end_words > ptr) {
|
||||
while (end > end_words && end[-1] == 0x20) {
|
||||
end--;
|
||||
}
|
||||
if (end[-1] == 0x20 && start_words < end_words) {
|
||||
while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) {
|
||||
end -= SIZEOF_INT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while (end > ptr && end[-1] == 0x20)
|
||||
end--;
|
||||
return (end);
|
||||
}
|
||||
|
||||
|
||||
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char *frm_end,
|
||||
unsigned char *str_end, unsigned int nweights, unsigned int flags) {
|
||||
|
18089
deps/oblib/src/lib/charset/ob_ctype_sjis.cc
vendored
Normal file
18089
deps/oblib/src/lib/charset/ob_ctype_sjis.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -260,7 +260,9 @@ static ObCharsetHandler ob_charset_tis620_handler = {
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit};
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
|
||||
ObCharsetInfo ob_charset_tis620_thai_ci = {
|
||||
|
4216
deps/oblib/src/lib/charset/ob_ctype_uca.cc
vendored
4216
deps/oblib/src/lib/charset/ob_ctype_uca.cc
vendored
File diff suppressed because it is too large
Load Diff
176
deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
vendored
176
deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
vendored
@ -513,6 +513,182 @@ ObUCAInfo ob_uca_v520 = {
|
||||
0,
|
||||
0
|
||||
};
|
||||
static ObUnicaseInfoChar turk00[] = {
|
||||
{0x0000, 0x0000, 0x0000}, {0x0001, 0x0001, 0x0001},
|
||||
{0x0002, 0x0002, 0x0002}, {0x0003, 0x0003, 0x0003},
|
||||
{0x0004, 0x0004, 0x0004}, {0x0005, 0x0005, 0x0005},
|
||||
{0x0006, 0x0006, 0x0006}, {0x0007, 0x0007, 0x0007},
|
||||
{0x0008, 0x0008, 0x0008}, {0x0009, 0x0009, 0x0009},
|
||||
{0x000A, 0x000A, 0x000A}, {0x000B, 0x000B, 0x000B},
|
||||
{0x000C, 0x000C, 0x000C}, {0x000D, 0x000D, 0x000D},
|
||||
{0x000E, 0x000E, 0x000E}, {0x000F, 0x000F, 0x000F},
|
||||
{0x0010, 0x0010, 0x0010}, {0x0011, 0x0011, 0x0011},
|
||||
{0x0012, 0x0012, 0x0012}, {0x0013, 0x0013, 0x0013},
|
||||
{0x0014, 0x0014, 0x0014}, {0x0015, 0x0015, 0x0015},
|
||||
{0x0016, 0x0016, 0x0016}, {0x0017, 0x0017, 0x0017},
|
||||
{0x0018, 0x0018, 0x0018}, {0x0019, 0x0019, 0x0019},
|
||||
{0x001A, 0x001A, 0x001A}, {0x001B, 0x001B, 0x001B},
|
||||
{0x001C, 0x001C, 0x001C}, {0x001D, 0x001D, 0x001D},
|
||||
{0x001E, 0x001E, 0x001E}, {0x001F, 0x001F, 0x001F},
|
||||
{0x0020, 0x0020, 0x0020}, {0x0021, 0x0021, 0x0021},
|
||||
{0x0022, 0x0022, 0x0022}, {0x0023, 0x0023, 0x0023},
|
||||
{0x0024, 0x0024, 0x0024}, {0x0025, 0x0025, 0x0025},
|
||||
{0x0026, 0x0026, 0x0026}, {0x0027, 0x0027, 0x0027},
|
||||
{0x0028, 0x0028, 0x0028}, {0x0029, 0x0029, 0x0029},
|
||||
{0x002A, 0x002A, 0x002A}, {0x002B, 0x002B, 0x002B},
|
||||
{0x002C, 0x002C, 0x002C}, {0x002D, 0x002D, 0x002D},
|
||||
{0x002E, 0x002E, 0x002E}, {0x002F, 0x002F, 0x002F},
|
||||
{0x0030, 0x0030, 0x0030}, {0x0031, 0x0031, 0x0031},
|
||||
{0x0032, 0x0032, 0x0032}, {0x0033, 0x0033, 0x0033},
|
||||
{0x0034, 0x0034, 0x0034}, {0x0035, 0x0035, 0x0035},
|
||||
{0x0036, 0x0036, 0x0036}, {0x0037, 0x0037, 0x0037},
|
||||
{0x0038, 0x0038, 0x0038}, {0x0039, 0x0039, 0x0039},
|
||||
{0x003A, 0x003A, 0x003A}, {0x003B, 0x003B, 0x003B},
|
||||
{0x003C, 0x003C, 0x003C}, {0x003D, 0x003D, 0x003D},
|
||||
{0x003E, 0x003E, 0x003E}, {0x003F, 0x003F, 0x003F},
|
||||
{0x0040, 0x0040, 0x0040}, {0x0041, 0x0061, 0x0041},
|
||||
{0x0042, 0x0062, 0x0042}, {0x0043, 0x0063, 0x0043},
|
||||
{0x0044, 0x0064, 0x0044}, {0x0045, 0x0065, 0x0045},
|
||||
{0x0046, 0x0066, 0x0046}, {0x0047, 0x0067, 0x0047},
|
||||
{0x0048, 0x0068, 0x0048}, {0x0049, 0x0131, 0x0049},
|
||||
{0x004A, 0x006A, 0x004A}, {0x004B, 0x006B, 0x004B},
|
||||
{0x004C, 0x006C, 0x004C}, {0x004D, 0x006D, 0x004D},
|
||||
{0x004E, 0x006E, 0x004E}, {0x004F, 0x006F, 0x004F},
|
||||
{0x0050, 0x0070, 0x0050}, {0x0051, 0x0071, 0x0051},
|
||||
{0x0052, 0x0072, 0x0052}, {0x0053, 0x0073, 0x0053},
|
||||
{0x0054, 0x0074, 0x0054}, {0x0055, 0x0075, 0x0055},
|
||||
{0x0056, 0x0076, 0x0056}, {0x0057, 0x0077, 0x0057},
|
||||
{0x0058, 0x0078, 0x0058}, {0x0059, 0x0079, 0x0059},
|
||||
{0x005A, 0x007A, 0x005A}, {0x005B, 0x005B, 0x005B},
|
||||
{0x005C, 0x005C, 0x005C}, {0x005D, 0x005D, 0x005D},
|
||||
{0x005E, 0x005E, 0x005E}, {0x005F, 0x005F, 0x005F},
|
||||
{0x0060, 0x0060, 0x0060}, {0x0041, 0x0061, 0x0041},
|
||||
{0x0042, 0x0062, 0x0042}, {0x0043, 0x0063, 0x0043},
|
||||
{0x0044, 0x0064, 0x0044}, {0x0045, 0x0065, 0x0045},
|
||||
{0x0046, 0x0066, 0x0046}, {0x0047, 0x0067, 0x0047},
|
||||
{0x0048, 0x0068, 0x0048}, {0x0130, 0x0069, 0x0049},
|
||||
{0x004A, 0x006A, 0x004A}, {0x004B, 0x006B, 0x004B},
|
||||
{0x004C, 0x006C, 0x004C}, {0x004D, 0x006D, 0x004D},
|
||||
{0x004E, 0x006E, 0x004E}, {0x004F, 0x006F, 0x004F},
|
||||
{0x0050, 0x0070, 0x0050}, {0x0051, 0x0071, 0x0051},
|
||||
{0x0052, 0x0072, 0x0052}, {0x0053, 0x0073, 0x0053},
|
||||
{0x0054, 0x0074, 0x0054}, {0x0055, 0x0075, 0x0055},
|
||||
{0x0056, 0x0076, 0x0056}, {0x0057, 0x0077, 0x0057},
|
||||
{0x0058, 0x0078, 0x0058}, {0x0059, 0x0079, 0x0059},
|
||||
{0x005A, 0x007A, 0x005A}, {0x007B, 0x007B, 0x007B},
|
||||
{0x007C, 0x007C, 0x007C}, {0x007D, 0x007D, 0x007D},
|
||||
{0x007E, 0x007E, 0x007E}, {0x007F, 0x007F, 0x007F},
|
||||
{0x0080, 0x0080, 0x0080}, {0x0081, 0x0081, 0x0081},
|
||||
{0x0082, 0x0082, 0x0082}, {0x0083, 0x0083, 0x0083},
|
||||
{0x0084, 0x0084, 0x0084}, {0x0085, 0x0085, 0x0085},
|
||||
{0x0086, 0x0086, 0x0086}, {0x0087, 0x0087, 0x0087},
|
||||
{0x0088, 0x0088, 0x0088}, {0x0089, 0x0089, 0x0089},
|
||||
{0x008A, 0x008A, 0x008A}, {0x008B, 0x008B, 0x008B},
|
||||
{0x008C, 0x008C, 0x008C}, {0x008D, 0x008D, 0x008D},
|
||||
{0x008E, 0x008E, 0x008E}, {0x008F, 0x008F, 0x008F},
|
||||
{0x0090, 0x0090, 0x0090}, {0x0091, 0x0091, 0x0091},
|
||||
{0x0092, 0x0092, 0x0092}, {0x0093, 0x0093, 0x0093},
|
||||
{0x0094, 0x0094, 0x0094}, {0x0095, 0x0095, 0x0095},
|
||||
{0x0096, 0x0096, 0x0096}, {0x0097, 0x0097, 0x0097},
|
||||
{0x0098, 0x0098, 0x0098}, {0x0099, 0x0099, 0x0099},
|
||||
{0x009A, 0x009A, 0x009A}, {0x009B, 0x009B, 0x009B},
|
||||
{0x009C, 0x009C, 0x009C}, {0x009D, 0x009D, 0x009D},
|
||||
{0x009E, 0x009E, 0x009E}, {0x009F, 0x009F, 0x009F},
|
||||
{0x00A0, 0x00A0, 0x00A0}, {0x00A1, 0x00A1, 0x00A1},
|
||||
{0x00A2, 0x00A2, 0x00A2}, {0x00A3, 0x00A3, 0x00A3},
|
||||
{0x00A4, 0x00A4, 0x00A4}, {0x00A5, 0x00A5, 0x00A5},
|
||||
{0x00A6, 0x00A6, 0x00A6}, {0x00A7, 0x00A7, 0x00A7},
|
||||
{0x00A8, 0x00A8, 0x00A8}, {0x00A9, 0x00A9, 0x00A9},
|
||||
{0x00AA, 0x00AA, 0x00AA}, {0x00AB, 0x00AB, 0x00AB},
|
||||
{0x00AC, 0x00AC, 0x00AC}, {0x00AD, 0x00AD, 0x00AD},
|
||||
{0x00AE, 0x00AE, 0x00AE}, {0x00AF, 0x00AF, 0x00AF},
|
||||
{0x00B0, 0x00B0, 0x00B0}, {0x00B1, 0x00B1, 0x00B1},
|
||||
{0x00B2, 0x00B2, 0x00B2}, {0x00B3, 0x00B3, 0x00B3},
|
||||
{0x00B4, 0x00B4, 0x00B4}, {0x039C, 0x00B5, 0x039C},
|
||||
{0x00B6, 0x00B6, 0x00B6}, {0x00B7, 0x00B7, 0x00B7},
|
||||
{0x00B8, 0x00B8, 0x00B8}, {0x00B9, 0x00B9, 0x00B9},
|
||||
{0x00BA, 0x00BA, 0x00BA}, {0x00BB, 0x00BB, 0x00BB},
|
||||
{0x00BC, 0x00BC, 0x00BC}, {0x00BD, 0x00BD, 0x00BD},
|
||||
{0x00BE, 0x00BE, 0x00BE}, {0x00BF, 0x00BF, 0x00BF},
|
||||
{0x00C0, 0x00E0, 0x0041}, {0x00C1, 0x00E1, 0x0041},
|
||||
{0x00C2, 0x00E2, 0x0041}, {0x00C3, 0x00E3, 0x0041},
|
||||
{0x00C4, 0x00E4, 0x0041}, {0x00C5, 0x00E5, 0x0041},
|
||||
{0x00C6, 0x00E6, 0x00C6}, {0x00C7, 0x00E7, 0x0043},
|
||||
{0x00C8, 0x00E8, 0x0045}, {0x00C9, 0x00E9, 0x0045},
|
||||
{0x00CA, 0x00EA, 0x0045}, {0x00CB, 0x00EB, 0x0045},
|
||||
{0x00CC, 0x00EC, 0x0049}, {0x00CD, 0x00ED, 0x0049},
|
||||
{0x00CE, 0x00EE, 0x0049}, {0x00CF, 0x00EF, 0x0049},
|
||||
{0x00D0, 0x00F0, 0x00D0}, {0x00D1, 0x00F1, 0x004E},
|
||||
{0x00D2, 0x00F2, 0x004F}, {0x00D3, 0x00F3, 0x004F},
|
||||
{0x00D4, 0x00F4, 0x004F}, {0x00D5, 0x00F5, 0x004F},
|
||||
{0x00D6, 0x00F6, 0x004F}, {0x00D7, 0x00D7, 0x00D7},
|
||||
{0x00D8, 0x00F8, 0x00D8}, {0x00D9, 0x00F9, 0x0055},
|
||||
{0x00DA, 0x00FA, 0x0055}, {0x00DB, 0x00FB, 0x0055},
|
||||
{0x00DC, 0x00FC, 0x0055}, {0x00DD, 0x00FD, 0x0059},
|
||||
{0x00DE, 0x00FE, 0x00DE}, {0x00DF, 0x00DF, 0x00DF},
|
||||
{0x00C0, 0x00E0, 0x0041}, {0x00C1, 0x00E1, 0x0041},
|
||||
{0x00C2, 0x00E2, 0x0041}, {0x00C3, 0x00E3, 0x0041},
|
||||
{0x00C4, 0x00E4, 0x0041}, {0x00C5, 0x00E5, 0x0041},
|
||||
{0x00C6, 0x00E6, 0x00C6}, {0x00C7, 0x00E7, 0x0043},
|
||||
{0x00C8, 0x00E8, 0x0045}, {0x00C9, 0x00E9, 0x0045},
|
||||
{0x00CA, 0x00EA, 0x0045}, {0x00CB, 0x00EB, 0x0045},
|
||||
{0x00CC, 0x00EC, 0x0049}, {0x00CD, 0x00ED, 0x0049},
|
||||
{0x00CE, 0x00EE, 0x0049}, {0x00CF, 0x00EF, 0x0049},
|
||||
{0x00D0, 0x00F0, 0x00D0}, {0x00D1, 0x00F1, 0x004E},
|
||||
{0x00D2, 0x00F2, 0x004F}, {0x00D3, 0x00F3, 0x004F},
|
||||
{0x00D4, 0x00F4, 0x004F}, {0x00D5, 0x00F5, 0x004F},
|
||||
{0x00D6, 0x00F6, 0x004F}, {0x00F7, 0x00F7, 0x00F7},
|
||||
{0x00D8, 0x00F8, 0x00D8}, {0x00D9, 0x00F9, 0x0055},
|
||||
{0x00DA, 0x00FA, 0x0055}, {0x00DB, 0x00FB, 0x0055},
|
||||
{0x00DC, 0x00FC, 0x0055}, {0x00DD, 0x00FD, 0x0059},
|
||||
{0x00DE, 0x00FE, 0x00DE}, {0x0178, 0x00FF, 0x0059}};
|
||||
|
||||
extern ObUnicaseInfoChar utf8_plane01[];
|
||||
extern ObUnicaseInfoChar utf8_plane02[];
|
||||
extern ObUnicaseInfoChar utf8_plane03[];
|
||||
extern ObUnicaseInfoChar utf8_plane04[];
|
||||
extern ObUnicaseInfoChar utf8_plane05[];
|
||||
extern ObUnicaseInfoChar utf8_plane1E[];
|
||||
extern ObUnicaseInfoChar utf8_plane1F[];
|
||||
extern ObUnicaseInfoChar utf8_plane21[];
|
||||
extern ObUnicaseInfoChar utf8_plane24[];
|
||||
extern ObUnicaseInfoChar utf8_planeFF[];
|
||||
|
||||
const static ObUnicaseInfoChar *ob_unicase_pages_turkish[256] = {
|
||||
turk00, utf8_plane01, utf8_plane02, utf8_plane03, utf8_plane04, utf8_plane05, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, utf8_plane1E, utf8_plane1F,
|
||||
nullptr, utf8_plane21, nullptr, nullptr, utf8_plane24, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, utf8_planeFF};
|
||||
|
||||
ObUnicaseInfo ob_unicase_turkish = {0xFFFF, ob_unicase_pages_turkish};
|
||||
|
||||
static uint16 uca_000data[]= { /* 0000 (4 weights per char) */
|
||||
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
|
||||
|
190
deps/oblib/src/lib/charset/ob_ctype_utf16.cc
vendored
190
deps/oblib/src/lib/charset/ob_ctype_utf16.cc
vendored
@ -1126,6 +1126,25 @@ PAD_MIN_MAX:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
const unsigned char *skip_trailing_space_utf16(const struct ObCharsetInfo * __attribute__((unused)), const unsigned char *ptr,size_t len)
|
||||
{
|
||||
const unsigned char *end= ptr + len;
|
||||
while (end - 1 > ptr && end[-2] == 0x00 && end[-1] == 0x20)
|
||||
end-=2;
|
||||
|
||||
return (end);
|
||||
}
|
||||
|
||||
const unsigned char *skip_trailing_space_utf16le(const struct ObCharsetInfo * __attribute__((unused)), const unsigned char *ptr,size_t len)
|
||||
{
|
||||
const unsigned char *end= ptr + len;
|
||||
while (end - 1 > ptr && end[-2] == 0x20 && end[-1] == 0x00)
|
||||
end-=2;
|
||||
|
||||
return (end);
|
||||
}
|
||||
|
||||
ObCharsetHandler ob_charset_utf16_handler=
|
||||
{
|
||||
NULL,
|
||||
@ -1148,7 +1167,8 @@ ObCharsetHandler ob_charset_utf16_handler=
|
||||
ob_strntoull_mb2_or_mb4,
|
||||
ob_strntod_mb2_or_mb4,
|
||||
ob_strntoull10rnd_mb2_or_mb4,
|
||||
ob_scan_mb2
|
||||
ob_scan_mb2,
|
||||
skip_trailing_space_utf16
|
||||
};
|
||||
|
||||
static ObCollationHandler ob_collation_utf16_bin_handler =
|
||||
@ -1256,3 +1276,171 @@ ObCharsetInfo ob_charset_utf16_general_ci=
|
||||
&ob_collation_utf16_general_ci_handler,
|
||||
PAD_SPACE
|
||||
};
|
||||
|
||||
/*
|
||||
D800..DB7F - Non-provate surrogate high (896 pages)
|
||||
DB80..DBFF - Private surrogate high (128 pages)
|
||||
DC00..DFFF - Surrogate low (1024 codes in a page)
|
||||
*/
|
||||
#define OB_UTF16_SURROGATE_HIGH_FIRST 0xD800
|
||||
#define OB_UTF16_SURROGATE_LOW_FIRST 0xDC00
|
||||
#define OB_UTF16_SURROGATE_LOW_LAST 0xDFFF
|
||||
|
||||
static size_t ob_lengthsp_utf16le(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length)
|
||||
{
|
||||
const char *end = ptr + length;
|
||||
while (end > ptr + 1 && uint2korr(end - 2) == 0x20) end -= 2;
|
||||
return (size_t)(end - ptr);
|
||||
}
|
||||
|
||||
static int
|
||||
ob_utf16le_uni(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t *pwc, const unsigned char *str, const unsigned char *end)
|
||||
{
|
||||
ob_wc_t lo;
|
||||
|
||||
if (str + 2 > end) return OB_CS_TOOSMALL2;
|
||||
|
||||
if ((*pwc = uint2korr(str)) < OB_UTF16_SURROGATE_HIGH_FIRST ||
|
||||
(*pwc > OB_UTF16_SURROGATE_LOW_LAST))
|
||||
return 2; /* [0000-D7FF,E000-FFFF] */
|
||||
|
||||
if (*pwc >= OB_UTF16_SURROGATE_LOW_FIRST)
|
||||
return OB_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
|
||||
|
||||
if (str + 4 > end) return OB_CS_TOOSMALL4;
|
||||
|
||||
str += 2;
|
||||
|
||||
if ((lo = uint2korr(str)) < OB_UTF16_SURROGATE_LOW_FIRST ||
|
||||
lo > OB_UTF16_SURROGATE_LOW_LAST)
|
||||
return OB_CS_ILSEQ; /* Expected low surrogate part, got something else */
|
||||
|
||||
*pwc = 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int ob_uni_utf16le(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t wc, unsigned char *str, unsigned char *end)
|
||||
{
|
||||
if (wc < OB_UTF16_SURROGATE_HIGH_FIRST ||
|
||||
(wc > OB_UTF16_SURROGATE_LOW_LAST && wc <= 0xFFFF)) {
|
||||
if (str + 2 > end) return OB_CS_TOOSMALL2;
|
||||
int2store(str, (uint16)wc);
|
||||
return 2; /* [0000-D7FF, E000-FFFF] */
|
||||
}
|
||||
|
||||
if (wc < 0xFFFF || wc > 0x10FFFF)
|
||||
return OB_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
|
||||
|
||||
if (str + 4 > end) return OB_CS_TOOSMALL4;
|
||||
|
||||
wc -= 0x10000;
|
||||
int2store(str, (0xD800 | ((wc >> 10) & 0x3FF)));
|
||||
str += 2;
|
||||
int2store(str, (0xDC00 | (wc & 0x3FF)));
|
||||
return 4; /* [010000-10FFFF] */
|
||||
}
|
||||
|
||||
ObCharsetHandler ob_charset_utf16le_handler=
|
||||
{
|
||||
NULL,
|
||||
ob_ismbchar_utf16,
|
||||
ob_mbcharlen_utf16,
|
||||
ob_numchars_utf16,
|
||||
ob_charpos_utf16,
|
||||
ob_max_bytes_charpos_mb,
|
||||
ob_well_formed_len_utf16,
|
||||
ob_lengthsp_utf16le,
|
||||
ob_utf16le_uni, /* mb_wc */
|
||||
ob_uni_utf16le, /* wc_mb */
|
||||
ob_mb_ctype_mb,
|
||||
ob_caseup_utf16,
|
||||
ob_casedn_utf16,
|
||||
ob_fill_mb2,
|
||||
ob_strntol_mb2_or_mb4,
|
||||
ob_strntoul_mb2_or_mb4,
|
||||
ob_strntoll_mb2_or_mb4,
|
||||
ob_strntoull_mb2_or_mb4,
|
||||
ob_strntod_mb2_or_mb4,
|
||||
ob_strntoull10rnd_mb2_or_mb4,
|
||||
ob_scan_mb2,
|
||||
skip_trailing_space_utf16le
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_utf16le_general_ci=
|
||||
{
|
||||
56,
|
||||
0,
|
||||
0,
|
||||
OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII,
|
||||
OB_UTF16LE,
|
||||
OB_UTF16LE_GENERAL_CI,
|
||||
"UTF-16LE Unicode",
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
4,
|
||||
1, /* mbmaxlenlen */
|
||||
0,
|
||||
0xFFFF,
|
||||
' ',
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_utf16le_handler,
|
||||
&ob_collation_utf16_general_ci_handler,
|
||||
PAD_SPACE
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_utf16le_bin=
|
||||
{
|
||||
62,
|
||||
0,
|
||||
0,
|
||||
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII,
|
||||
OB_UTF16LE,
|
||||
OB_UTF16LE_BIN,
|
||||
"UTF-16LE Unicode",
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
4,
|
||||
1, /* mbmaxlenlen */
|
||||
0,
|
||||
0xFFFF,
|
||||
' ',
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_utf16le_handler,
|
||||
&ob_collation_utf16_bin_handler,
|
||||
PAD_SPACE
|
||||
};
|
||||
|
20
deps/oblib/src/lib/charset/ob_ctype_utf8.cc
vendored
20
deps/oblib/src/lib/charset/ob_ctype_utf8.cc
vendored
@ -18,6 +18,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "lib/charset/ob_byteorder.h"
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/charset/ob_dtoa.h"
|
||||
#include "lib/charset/ob_uctype.h"
|
||||
@ -468,9 +469,7 @@ size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
|
||||
src+= res;
|
||||
if (uni_plane)
|
||||
ob_tosort_unicode(uni_plane, &wc, cs->state);
|
||||
if ((res= cs->cset->wc_mb(cs, wc, dst, de)) <= 0)
|
||||
break;
|
||||
dst+= res;
|
||||
dst = store16be(dst, wc); //这是是不是bydesign的
|
||||
}
|
||||
ob_strnxfrm_unicode_help(&dst,&de, nweights, flags, &dst0);
|
||||
return dst - dst0;
|
||||
@ -879,9 +878,13 @@ size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo *cs,
|
||||
break;
|
||||
}
|
||||
src+= res;
|
||||
if ((res= cs->cset->wc_mb(cs, wc, dst, de)) <= 0)
|
||||
break;
|
||||
dst+= res;
|
||||
*dst++= (uchar) (wc >> 16);
|
||||
if (dst < de)
|
||||
{
|
||||
*dst++= (uchar) ((wc >> 8) & 0xFF);
|
||||
if (dst < de)
|
||||
*dst++= (uchar) (wc & 0xFF);
|
||||
}
|
||||
}
|
||||
if (flags & OB_STRXFRM_PAD_WITH_SPACE)
|
||||
{
|
||||
@ -953,7 +956,7 @@ ObCharsetHandler ob_charset_utf8mb4_handler=
|
||||
ob_max_bytes_charpos_mb,
|
||||
ob_well_formed_len_utf8mb4,
|
||||
ob_lengthsp_8bit,
|
||||
ob_mb_wc_utf8mb4,
|
||||
ob_mb_wc_utf8mb4_thunk,
|
||||
ob_wc_mb_utf8mb4,
|
||||
ob_mb_ctype_mb,
|
||||
ob_caseup_utf8mb4,
|
||||
@ -966,7 +969,8 @@ ObCharsetHandler ob_charset_utf8mb4_handler=
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
ob_scan_8bit,
|
||||
skip_trailing_space
|
||||
};
|
||||
|
||||
static ObCollationHandler ob_collation_utf8mb4_general_ci_handler=
|
||||
|
27
deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
vendored
27
deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
vendored
@ -9,6 +9,9 @@
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#ifndef OB_CTYPE_UTF8_TAB_H_
|
||||
#define OB_CTYPE_UTF8_TAB_H_
|
||||
|
||||
static unsigned char ctype_utf8mb4[]=
|
||||
{
|
||||
0,
|
||||
@ -73,7 +76,7 @@ static uchar to_upper_utf8mb4[]=
|
||||
};
|
||||
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane00[]={
|
||||
ObUnicaseInfoChar utf8_plane00[]={
|
||||
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
|
||||
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
|
||||
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
|
||||
@ -204,7 +207,7 @@ static ObUnicaseInfoChar utf8_plane00[]={
|
||||
{0x00DE,0x00FE,0x00DE}, {0x0178,0x00FF,0x0059}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane01[]={
|
||||
ObUnicaseInfoChar utf8_plane01[]={
|
||||
{0x0100,0x0101,0x0041}, {0x0100,0x0101,0x0041},
|
||||
{0x0102,0x0103,0x0041}, {0x0102,0x0103,0x0041},
|
||||
{0x0104,0x0105,0x0041}, {0x0104,0x0105,0x0041},
|
||||
@ -335,7 +338,7 @@ static ObUnicaseInfoChar utf8_plane01[]={
|
||||
{0x01FE,0x01FF,0x00D8}, {0x01FE,0x01FF,0x00D8}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane02[]={
|
||||
ObUnicaseInfoChar utf8_plane02[]={
|
||||
{0x0200,0x0201,0x0041}, {0x0200,0x0201,0x0041},
|
||||
{0x0202,0x0203,0x0041}, {0x0202,0x0203,0x0041},
|
||||
{0x0204,0x0205,0x0045}, {0x0204,0x0205,0x0045},
|
||||
@ -466,7 +469,7 @@ static ObUnicaseInfoChar utf8_plane02[]={
|
||||
{0x02FE,0x02FE,0x02FE}, {0x02FF,0x02FF,0x02FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane03[]={
|
||||
ObUnicaseInfoChar utf8_plane03[]={
|
||||
{0x0300,0x0300,0x0300}, {0x0301,0x0301,0x0301},
|
||||
{0x0302,0x0302,0x0302}, {0x0303,0x0303,0x0303},
|
||||
{0x0304,0x0304,0x0304}, {0x0305,0x0305,0x0305},
|
||||
@ -597,7 +600,7 @@ static ObUnicaseInfoChar utf8_plane03[]={
|
||||
{0x03FE,0x03FE,0x03FE}, {0x03FF,0x03FF,0x03FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane04[]={
|
||||
ObUnicaseInfoChar utf8_plane04[]={
|
||||
{0x0400,0x0450,0x0415}, {0x0401,0x0451,0x0415},
|
||||
{0x0402,0x0452,0x0402}, {0x0403,0x0453,0x0413},
|
||||
{0x0404,0x0454,0x0404}, {0x0405,0x0455,0x0405},
|
||||
@ -728,7 +731,7 @@ static ObUnicaseInfoChar utf8_plane04[]={
|
||||
{0x04FE,0x04FE,0x04FE}, {0x04FF,0x04FF,0x04FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane05[]={
|
||||
ObUnicaseInfoChar utf8_plane05[]={
|
||||
{0x0500,0x0500,0x0500}, {0x0501,0x0501,0x0501},
|
||||
{0x0502,0x0502,0x0502}, {0x0503,0x0503,0x0503},
|
||||
{0x0504,0x0504,0x0504}, {0x0505,0x0505,0x0505},
|
||||
@ -859,7 +862,7 @@ static ObUnicaseInfoChar utf8_plane05[]={
|
||||
{0x05FE,0x05FE,0x05FE}, {0x05FF,0x05FF,0x05FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane1E[]={
|
||||
ObUnicaseInfoChar utf8_plane1E[]={
|
||||
{0x1E00,0x1E01,0x0041}, {0x1E00,0x1E01,0x0041},
|
||||
{0x1E02,0x1E03,0x0042}, {0x1E02,0x1E03,0x0042},
|
||||
{0x1E04,0x1E05,0x0042}, {0x1E04,0x1E05,0x0042},
|
||||
@ -990,7 +993,7 @@ static ObUnicaseInfoChar utf8_plane1E[]={
|
||||
{0x1EFE,0x1EFE,0x1EFE}, {0x1EFF,0x1EFF,0x1EFF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane1F[]={
|
||||
ObUnicaseInfoChar utf8_plane1F[]={
|
||||
{0x1F08,0x1F00,0x0391}, {0x1F09,0x1F01,0x0391},
|
||||
{0x1F0A,0x1F02,0x0391}, {0x1F0B,0x1F03,0x0391},
|
||||
{0x1F0C,0x1F04,0x0391}, {0x1F0D,0x1F05,0x0391},
|
||||
@ -1121,7 +1124,7 @@ static ObUnicaseInfoChar utf8_plane1F[]={
|
||||
{0x1FFE,0x1FFE,0x1FFE}, {0x1FFF,0x1FFF,0x1FFF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane21[]={
|
||||
ObUnicaseInfoChar utf8_plane21[]={
|
||||
{0x2100,0x2100,0x2100}, {0x2101,0x2101,0x2101},
|
||||
{0x2102,0x2102,0x2102}, {0x2103,0x2103,0x2103},
|
||||
{0x2104,0x2104,0x2104}, {0x2105,0x2105,0x2105},
|
||||
@ -1252,7 +1255,7 @@ static ObUnicaseInfoChar utf8_plane21[]={
|
||||
{0x21FE,0x21FE,0x21FE}, {0x21FF,0x21FF,0x21FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_plane24[]={
|
||||
ObUnicaseInfoChar utf8_plane24[]={
|
||||
{0x2400,0x2400,0x2400}, {0x2401,0x2401,0x2401},
|
||||
{0x2402,0x2402,0x2402}, {0x2403,0x2403,0x2403},
|
||||
{0x2404,0x2404,0x2404}, {0x2405,0x2405,0x2405},
|
||||
@ -1383,7 +1386,7 @@ static ObUnicaseInfoChar utf8_plane24[]={
|
||||
{0x24FE,0x24FE,0x24FE}, {0x24FF,0x24FF,0x24FF}
|
||||
};
|
||||
|
||||
static ObUnicaseInfoChar utf8_planeFF[]={
|
||||
ObUnicaseInfoChar utf8_planeFF[]={
|
||||
{0xFF00,0xFF00,0xFF00}, {0xFF01,0xFF01,0xFF01},
|
||||
{0xFF02,0xFF02,0xFF02}, {0xFF03,0xFF03,0xFF03},
|
||||
{0xFF04,0xFF04,0xFF04}, {0xFF05,0xFF05,0xFF05},
|
||||
@ -4461,3 +4464,5 @@ const ObUnicaseInfoChar *ob_unicase_pages_unicode520[4352] = {
|
||||
|
||||
|
||||
ObUnicaseInfo ob_unicase_unicode520 = {0x10FFFF, ob_unicase_pages_unicode520};
|
||||
|
||||
#endif
|
||||
|
32
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
32
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
@ -148,7 +148,7 @@ TEST_F(TestCharset, sortkey)
|
||||
|
||||
char space[10] = " ";
|
||||
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, space, strlen(space), aa1, 10, is_valid_unicode);
|
||||
ASSERT_EQ(size1, 2);
|
||||
ASSERT_EQ(size1, 4);
|
||||
ASSERT_TRUE(is_valid_unicode);
|
||||
|
||||
char empty[10] = "";
|
||||
@ -161,7 +161,7 @@ TEST_F(TestCharset, sortkey)
|
||||
invalid[1] = char(0x80);
|
||||
invalid[2] = '\0';
|
||||
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, invalid, strlen(invalid), aa1, 10, is_valid_unicode);
|
||||
ASSERT_EQ(size1, 1);
|
||||
ASSERT_EQ(size1, 2);
|
||||
ASSERT_FALSE(is_valid_unicode);
|
||||
|
||||
//std::map<int, int> charset{
|
||||
@ -202,11 +202,11 @@ TEST_F(TestCharset, sortkey)
|
||||
std::vector<std::vector<int>>result{
|
||||
{0,1,1,1,1},
|
||||
{1,4,1,1,0},
|
||||
{2,6,1,0,0},
|
||||
{2,4,1,0,0},
|
||||
{3,6,1,0,0},
|
||||
{4,1,1,1,1},
|
||||
{5,4,1,0,0},
|
||||
{6,4,1,0,0},
|
||||
{6,6,1,0,0},
|
||||
{7,1,1,1,1},
|
||||
{8,4,1,1,1},
|
||||
{9,10,1,10,1},
|
||||
@ -612,7 +612,8 @@ TEST_F(TestCharset, tolower)
|
||||
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
|
||||
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
||||
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
||||
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type)
|
||||
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type
|
||||
|| CHARSET_UTF16LE == charset_type || CHARSET_BINARY == charset_type)
|
||||
continue;
|
||||
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
|
||||
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
|
||||
@ -647,7 +648,8 @@ TEST_F(TestCharset, toupper)
|
||||
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
|
||||
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
||||
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
||||
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type)
|
||||
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type
|
||||
|| CHARSET_UTF16LE == charset_type || CHARSET_BINARY == charset_type)
|
||||
continue;
|
||||
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
|
||||
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
|
||||
@ -855,13 +857,17 @@ TEST_F(TestCharset, check_mbmaxlenlen)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<const char *> test_strings = {"1", "abcdef", "ab1dc4", "你好", "b今a天", "1abad "};
|
||||
std::vector<const char *> test_strings = {"1", "abcdef", "ab1dc4", "好", "b今a天", "1abad "};
|
||||
|
||||
|
||||
TEST_F(TestCharset, basic_collation_handler_test)
|
||||
{
|
||||
ObArenaAllocator alloc;
|
||||
for (int i = CS_TYPE_INVALID; i < CS_TYPE_EXTENDED_MARK; i++) {
|
||||
for (int i = CS_TYPE_INVALID; i < CS_TYPE_MAX; i++) {
|
||||
ObCollationType coll = static_cast<ObCollationType>(i);
|
||||
if (!ObCharset::is_valid_charset(coll)) {
|
||||
continue;
|
||||
}
|
||||
const ObCharsetInfo * cs = ObCharset::get_charset(coll);
|
||||
const char *coll_name = ObCharset::collation_name(coll);
|
||||
if (OB_NOT_NULL(cs)) {
|
||||
@ -957,6 +963,10 @@ TEST_F(TestCharset, foreach_char) {
|
||||
"抚凌云而自惜;钟期既遇,奏流水以何惭?呜呼!胜地不常,盛筵难再;兰亭已矣,梓泽丘墟。临别赠言,幸承恩于伟饯"
|
||||
"登高作赋,是所望于群公。敢竭鄙怀,恭疏短引;一言均赋,四韵俱成。请洒潘江,各倾陆海云尔:滕王高阁临江渚"
|
||||
"佩玉鸣鸾罢歌舞。画栋朝飞南浦云,珠帘暮卷西山雨。闲云潭影日悠悠,物换星移几度秋。阁中帝子今何在?槛外长江空自流。";
|
||||
|
||||
const char *data1 = "豫章故郡,洪都新府。星分翼軒,地接衡廬。襟三江而帶五湖,控蠻荊而引甌越。物華天寶,龍光射牛斗之墟。落霞與孤鷺齊飛,秋水共長天一色。"
|
||||
"人傑地靈,徐孺下陳蕃之榻。雄州霧列,俊採星馳。台隍枕夷夏之交,賓主盡東南之美。都督閻之雅望,棨戟遙臨"
|
||||
"時維九月,序屬三秋。潦水盡而寒潭清,煙光凝而暮山紫。物華天寶";
|
||||
/*
|
||||
const char *data = "I hear America singing, the varied carols I hear,Those of mechanics, "
|
||||
"each one singing his as it should be blithe and strong,The carpenter "
|
||||
@ -990,6 +1000,7 @@ TEST_F(TestCharset, foreach_char) {
|
||||
};
|
||||
|
||||
ObString data_in(data);
|
||||
ObString data_in1(data1);
|
||||
ObArenaAllocator alloc;
|
||||
|
||||
for (int i = CHARSET_BINARY + 1; i <= CHARSET_GB18030; i++) {
|
||||
@ -1036,8 +1047,11 @@ TEST_F(TestCharset, foreach_char) {
|
||||
ObCollationType test_collation_type = ObCharset::get_default_collation(test_cs_type);
|
||||
ObString data_out;
|
||||
ASSERT_TRUE(ObCharset::is_valid_collation(test_collation_type));
|
||||
if (ObCharset::get_charset(test_collation_type)->mbmaxlen == 1) {
|
||||
if (ObCharset::get_charset(test_collation_type)->mbmaxlen == 1 || test_cs_type == CHARSET_SJIS) {
|
||||
data_out = data_in;
|
||||
continue;
|
||||
} else if (test_cs_type == CHARSET_BIG5 || test_cs_type == CHARSET_HKSCS || test_cs_type == CHARSET_HKSCS31) {
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::charset_convert(alloc, data_in1, CS_TYPE_UTF8MB4_BIN, test_collation_type, data_out));
|
||||
} else {
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::charset_convert(alloc, data_in, CS_TYPE_UTF8MB4_BIN, test_collation_type, data_out));
|
||||
}
|
||||
|
@ -30,8 +30,23 @@ ob_set_subtarget(ob_share ALONE
|
||||
vector/expr_cmp_func_parts/expr_cmp_func_part_13.cpp
|
||||
vector/expr_cmp_func_parts/expr_cmp_func_part_14.cpp
|
||||
vector/expr_cmp_func_parts/expr_cmp_func_part_15.cpp
|
||||
|
||||
aggregate/approx_count_distinct_synopsis.cpp
|
||||
datum/ob_datum_funcs_compilation_0.cpp
|
||||
datum/ob_datum_funcs_compilation_1.cpp
|
||||
datum/ob_datum_funcs_compilation_2.cpp
|
||||
datum/ob_datum_funcs_compilation_3.cpp
|
||||
datum/ob_datum_funcs_compilation_4.cpp
|
||||
datum/ob_datum_funcs_compilation_5.cpp
|
||||
datum/ob_datum_funcs_compilation_6.cpp
|
||||
datum/ob_datum_funcs_compilation_7.cpp
|
||||
datum/ob_datum_funcs_compilation_8.cpp
|
||||
datum/ob_datum_funcs_compilation_9.cpp
|
||||
datum/ob_datum_funcs_compilation_10.cpp
|
||||
datum/ob_datum_funcs_compilation_11.cpp
|
||||
datum/ob_datum_funcs_compilation_12.cpp
|
||||
datum/ob_datum_funcs_compilation_13.cpp
|
||||
datum/ob_datum_funcs_compilation_14.cpp
|
||||
datum/ob_datum_funcs_compilation_15.cpp
|
||||
)
|
||||
|
||||
file(GLOB SCHEMA_CPPS "inner_table/ob_inner_table_schema.*.cpp")
|
||||
|
@ -488,14 +488,129 @@ typedef ObConstIntMapping<0,
|
||||
CS_TYPE_GB18030_2022_RADICAL_CS, 1,
|
||||
CS_TYPE_GB18030_2022_STROKE_CI, 1,
|
||||
CS_TYPE_GB18030_2022_STROKE_CS, 1,
|
||||
CS_TYPE_UTF8MB4_CROATIAN_CI, 1,
|
||||
CS_TYPE_UTF8MB4_UNICODE_520_CI, 1,
|
||||
CS_TYPE_UTF8MB4_CZECH_CI, 1,
|
||||
CS_TYPE_ASCII_GENERAL_CI,1,
|
||||
CS_TYPE_ASCII_BIN,1,
|
||||
CS_TYPE_TIS620_THAI_CI,1,
|
||||
CS_TYPE_TIS620_BIN,1,
|
||||
CS_TYPE_UTF8MB4_0900_AI_CI, 1> SupportedCollections;
|
||||
CS_TYPE_UTF16LE_GENERAL_CI, 1,
|
||||
CS_TYPE_UTF16LE_BIN, 1,
|
||||
CS_TYPE_SJIS_JAPANESE_CI, 1,
|
||||
CS_TYPE_SJIS_BIN, 1,
|
||||
CS_TYPE_BIG5_CHINESE_CI, 1,
|
||||
CS_TYPE_BIG5_BIN, 1,
|
||||
CS_TYPE_HKSCS_BIN, 1,
|
||||
CS_TYPE_HKSCS31_BIN, 1,
|
||||
CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI, 1,
|
||||
CS_TYPE_UTF8MB4_LATVIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_ROMANIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_SLOVENIAN_UCA_CI, 1,
|
||||
CS_TYPE_UTF8MB4_POLISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_ESTONIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_SPANISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_SWEDISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_TURKISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_CZECH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_DANISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_LITHUANIAN_UCA_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SLOVAK_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_SPANISH2_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_ROMAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_PERSIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_ESPERANTO_UCA_CI, 1,
|
||||
CS_TYPE_UTF8MB4_HUNGARIAN_UCA_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SINHALA_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_GERMAN2_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF8MB4_UNICODE_520_CI , 1,
|
||||
CS_TYPE_UTF8MB4_VIETNAMESE_CI , 1,
|
||||
CS_TYPE_UTF16_ICELANDIC_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_LATVIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_ROMANIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SLOVENIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_POLISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_ESTONIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SPANISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SWEDISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_TURKISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_CZECH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_DANISH_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_LITHUANIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SLOVAK_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SPANISH2_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_ROMAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_PERSIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_ESPERANTO_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_HUNGARIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_SINHALA_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_GERMAN2_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_CROATIAN_UCA_CI , 1,
|
||||
CS_TYPE_UTF16_UNICODE_520_CI , 1,
|
||||
CS_TYPE_UTF16_VIETNAMESE_CI , 1,
|
||||
CS_TYPE_UTF8MB4_0900_AI_CI , 1,
|
||||
CS_TYPE_UTF8MB4_DE_PB_0900_AI_CI , 1,
|
||||
CS_TYPE_UTF8MB4_IS_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_LV_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_RO_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SL_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_PL_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_ET_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_ES_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SV_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_TR_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_CS_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_DA_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_LT_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SK_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_ES_TRAD_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_LA_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_EO_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_HU_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_HR_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_VI_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_DE_PB_0900_AS_CS , 1,
|
||||
CS_TYPE_UTF8MB4_IS_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_LV_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_RO_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_SL_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_PL_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_ET_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_ES_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_SV_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_TR_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_CS_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_DA_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_LT_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_SK_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_ES_TRAD_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_LA_0900_AS_CS , 1,
|
||||
CS_TYPE_UTF8MB4_EO_0900_AS_CS , 1,
|
||||
CS_TYPE_UTF8MB4_HU_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_HR_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_VI_0900_AS_CS , 1,
|
||||
CS_TYPE_UTF8MB4_JA_0900_AS_CS , 1,
|
||||
CS_TYPE_UTF8MB4_JA_0900_AS_CS_KS , 1,
|
||||
CS_TYPE_UTF8MB4_0900_AS_CI, 1,
|
||||
CS_TYPE_UTF8MB4_RU_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_RU_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_ZH_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_0900_BIN, 1,
|
||||
CS_TYPE_UTF8MB4_NB_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_NB_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_NN_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_NN_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_SR_LATN_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_SR_LATN_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_BS_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_BS_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_BG_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_BG_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_GL_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_GL_0900_AS_CS, 1,
|
||||
CS_TYPE_UTF8MB4_MN_CYRL_0900_AI_CI, 1,
|
||||
CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS, 1,
|
||||
CS_TYPE_DEC8_SWEDISH_CI, 1,
|
||||
CS_TYPE_DEC8_BIN, 1> SupportedCollections;
|
||||
|
||||
// bool is_calc_with_end_space(ObObjType type1, ObObjType type2,
|
||||
// bool is_oracle_mode,
|
||||
|
File diff suppressed because it is too large
Load Diff
55
src/share/datum/ob_datum_funcs_compilation.ipp
Normal file
55
src/share/datum/ob_datum_funcs_compilation.ipp
Normal file
@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef OCEANBASE_STR_DATUM_FUNCS_IPP
|
||||
#define OCEANBASE_STR_DATUM_FUNCS_IPP
|
||||
|
||||
#include "ob_datum_funcs.h"
|
||||
#include "ob_datum_cmp_func_def.h"
|
||||
#include "common/object/ob_obj_funcs.h"
|
||||
#include "sql/engine/ob_serializable_function.h"
|
||||
#include "sql/engine/ob_bit_vector.h"
|
||||
#include "share/ob_cluster_version.h"
|
||||
#include "share/datum/ob_datum_funcs_impl.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
using namespace sql;
|
||||
namespace common
|
||||
{
|
||||
static const int COMPILATION_UNIT = 16;
|
||||
|
||||
#define DEF_COMPILATION_VARS(name, max_val, unit_idx) \
|
||||
constexpr int name##_unit_size = \
|
||||
max_val / COMPILATION_UNIT + (max_val % COMPILATION_UNIT == 0 ? 0 : 1); \
|
||||
constexpr int name##_start = \
|
||||
(name##_unit_size * unit_idx < max_val ? name##_unit_size * unit_idx : max_val); \
|
||||
constexpr int name##_end = \
|
||||
(name##_start + name##_unit_size >= max_val ? max_val : name##_start + name##_unit_size);
|
||||
|
||||
#define DEF_STR_FUNC_INIT(unit_idx) \
|
||||
void __init_str_func##unit_idx() \
|
||||
{ \
|
||||
DEF_COMPILATION_VARS(cs, CS_TYPE_MAX, unit_idx); \
|
||||
DEF_COMPILATION_VARS(ty, ObMaxType, unit_idx); \
|
||||
DEF_COMPILATION_VARS(tc, ObMaxTC, unit_idx); \
|
||||
DEF_COMPILATION_VARS(ty_basic, ObMaxType, unit_idx); \
|
||||
ObArrayConstIniter<cs_end, str_cmp_initer, cs_start>::init(); \
|
||||
Ob2DArrayConstIniter<cs_end, 2, str_basic_initer, cs_start, 0>::init(); \
|
||||
Ob2DArrayConstIniter<ty_end, ObMaxType, InitTypeCmpArray, ty_start, 0>::init(); \
|
||||
Ob2DArrayConstIniter<tc_end, ObMaxTC, InitTCCmpArray, tc_start, 0>::init(); \
|
||||
ObArrayConstIniter<ty_basic_end, InitBasicFuncArray, ty_basic_start>::init(); \
|
||||
}
|
||||
|
||||
} // end common
|
||||
} // end oceanbase
|
||||
#endif // OCEANBASE_STR_DATUM_FUNCS_IPP
|
21
src/share/datum/ob_datum_funcs_compilation_0.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_0.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(0);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_1.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_1.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(1);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_10.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_10.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(10);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_11.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_11.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(11);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_12.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_12.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(12);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_13.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_13.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(13);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_14.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_14.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(14);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_15.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_15.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(15);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_2.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_2.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(2);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_3.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_3.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(3);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_4.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_4.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(4);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_5.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_5.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(5);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_6.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_6.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(6);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_7.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_7.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(7);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_8.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_8.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(8);
|
||||
} // end common
|
||||
} // end oceanbase
|
21
src/share/datum/ob_datum_funcs_compilation_9.cpp
Normal file
21
src/share/datum/ob_datum_funcs_compilation_9.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_datum_funcs_compilation.ipp"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
DEF_STR_FUNC_INIT(9);
|
||||
} // end common
|
||||
} // end oceanbase
|
1219
src/share/datum/ob_datum_funcs_impl.h
Normal file
1219
src/share/datum/ob_datum_funcs_impl.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -525,8 +525,9 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
||||
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
||||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI || cs == CS_TYPE_UTF16LE_GENERAL_CI
|
||||
|| cs == CS_TYPE_UTF16LE_BIN
|
||||
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
||||
max_buf_len - to_len - safety_buf_size,
|
||||
is_mem, is_valid_uni);
|
||||
@ -577,8 +578,9 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
|
||||
}
|
||||
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
||||
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
||||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
|| cs == CS_TYPE_UTF16LE_GENERAL_CI || cs == CS_TYPE_UTF16LE_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI
|
||||
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
||||
max_buf_len - to_len - safty_buf_size,
|
||||
param.is_memcmp_, param.is_valid_uni_);
|
||||
@ -877,6 +879,8 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
|
||||
|| cs == CS_TYPE_UTF16_GENERAL_CI
|
||||
|| cs == CS_TYPE_UTF16_BIN
|
||||
|| cs == CS_TYPE_GB18030_CHINESE_CI
|
||||
|| cs == CS_TYPE_UTF16LE_GENERAL_CI
|
||||
|| cs == CS_TYPE_UTF16LE_BIN
|
||||
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
if (with_empty_str) {
|
||||
*to = 0x00;
|
||||
|
@ -358,8 +358,7 @@ int ObSchemaPrinter::print_table_definition_columns(const ObTableSchema &table_s
|
||||
oceanbase::common::ObCharsetType::CHARSET_BINARY == charset_type) {
|
||||
// observer perform no conversion of result sets or error messages, you can see more detail the official website of MySQL
|
||||
} else {
|
||||
ObCollationType collation_type = ObCharset::get_default_collation(charset_type);
|
||||
if (OB_FAIL(ObCharset::charset_convert(allocator, default_value.get_string(), default_value.get_collation_type(), collation_type, out_str))) {
|
||||
if (OB_FAIL(ObCharset::charset_convert(allocator, default_value.get_string(), default_value.get_collation_type(), ObCharset::get_system_collation(), out_str))) {
|
||||
SHARE_SCHEMA_LOG(WARN, "fail to convert charset", K(ret));
|
||||
}
|
||||
}
|
||||
|
@ -1924,6 +1924,24 @@ int ObSysVarOnCheckFuncs::check_and_convert_charset(ObExecContext &ctx,
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_ERROR("invalid type", K(ret), K(in_val));
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CLIENT)
|
||||
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CONNECTION)
|
||||
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_RESULTS)) {
|
||||
ObCollationType cstype = static_cast<ObCollationType>(out_val.get_int());
|
||||
if (!ObCharset::is_valid_collation(cstype)) {
|
||||
ret = OB_ERR_UNKNOWN_CHARSET;
|
||||
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, in_val.get_string().length(), in_val.get_string().ptr());
|
||||
} else if(ObCharset::get_charset(cstype)->mbminlen > 1) {
|
||||
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
|
||||
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
|
||||
set_var.var_name_.length(),
|
||||
set_var.var_name_.ptr(),
|
||||
in_val.get_string().length(),
|
||||
in_val.get_string().ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -1997,6 +2015,23 @@ int ObSysVarOnCheckFuncs::check_and_convert_collation_not_null(ObExecContext &ct
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_ERROR("invalid type", K(ret), K(in_val));
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
if (0 == set_var.var_name_.case_compare(OB_SV_COLLATION_CONNECTION)) {
|
||||
ObCollationType cstype = static_cast<ObCollationType>(out_val.get_int());
|
||||
if (!ObCharset::is_valid_collation(cstype)) {
|
||||
ret = OB_ERR_UNKNOWN_CHARSET;
|
||||
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, in_val.get_string().length(), in_val.get_string().ptr());
|
||||
} else if(ObCharset::get_charset(cstype)->mbminlen > 1) {
|
||||
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
|
||||
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
|
||||
set_var.var_name_.length(),
|
||||
set_var.var_name_.ptr(),
|
||||
in_val.get_string().length(),
|
||||
in_val.get_string().ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ struct VecTCHashCalc<VEC_TC_LOB, HashMethod, hash_v2>
|
||||
const uchar *key = reinterpret_cast<const uchar *>(char_data);
|
||||
const uchar *pos = key;
|
||||
int length = char_len;
|
||||
key = skip_trailing_space(key, char_len, 0);
|
||||
key = skip_trailing_space(&ob_charset_utf8mb4_bin, key, char_len);
|
||||
length = (int)(key - pos);
|
||||
res = HashMethod::hash((void*)pos, length, seed);
|
||||
}
|
||||
@ -419,7 +419,7 @@ struct VecTCHashCalc<VEC_TC_STRING, HashMethod, hash_v2> {
|
||||
const uchar *key = reinterpret_cast<const uchar *>(data);
|
||||
const uchar *pos = key;
|
||||
int length = len;
|
||||
key = skip_trailing_space(key, len, 0);
|
||||
key = skip_trailing_space(&ob_charset_utf8mb4_bin, key, len);
|
||||
length = (int)(key - pos);
|
||||
res = HashMethod::hash((void*)pos, length, seed);
|
||||
}
|
||||
|
@ -307,7 +307,7 @@ int ObDASDomainUtils::generate_spatial_index_rows(
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_ISNULL(helper)
|
||||
|| OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type
|
||||
|| ObCollationType::CS_TYPE_EXTENDED_MARK < type)
|
||||
|| ObCollationType::CS_TYPE_PINYIN_BEGIN_MARK <= type)
|
||||
|| OB_UNLIKELY(!words_count.created())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created()));
|
||||
|
@ -213,6 +213,19 @@ public:
|
||||
ret = scan_proto<common::CHARSET_GB18030, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_SJIS:
|
||||
ret = scan_proto<common::CHARSET_SJIS, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_BIG5:
|
||||
ret = scan_proto<common::CHARSET_BIG5, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
case common::CHARSET_HKSCS:
|
||||
case common::CHARSET_HKSCS31:
|
||||
ret = scan_proto<common::CHARSET_HKSCS, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
break;
|
||||
default:
|
||||
ret = scan_proto<common::CHARSET_BINARY, handle_func, NEED_ESCAPED_RESULT>(
|
||||
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
|
||||
@ -332,6 +345,27 @@ inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_GBK>(const char *ptr, c
|
||||
return (0x81 <= c && c <= 0xFE) ? 2 : 1;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_SJIS>(const char *ptr, const char *end) {
|
||||
UNUSED(end);
|
||||
unsigned char c = *ptr;
|
||||
return ((0x81 <= (c) && (c) <= 0x9f) || ((0xe0 <= (c)) && (c) <= 0xfc)) ? 2 : 1;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_BIG5>(const char *ptr, const char *end) {
|
||||
UNUSED(end);
|
||||
unsigned char c = *ptr;
|
||||
return (0xa1 <= c && c <= 0xf9) ? 2 : 1;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_HKSCS>(const char *ptr, const char *end) {
|
||||
UNUSED(end);
|
||||
unsigned char c = *ptr;
|
||||
return (0x81 <= c && c <= 0xfe) ? 2 : 1;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_GB18030>(const char *ptr, const char *end) {
|
||||
int mb_len = 1;
|
||||
|
@ -844,22 +844,6 @@ int ObVariableSetExecutor::check_and_convert_sys_var(ObExecContext &ctx,
|
||||
int ret = OB_SUCCESS;
|
||||
//OB_ASSERT(true == var_node.is_system_variable_);
|
||||
|
||||
// collation_connection的取值有限制,不能设置成utf16
|
||||
if (OB_SUCC(ret)) {
|
||||
if ((0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CLIENT)
|
||||
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CONNECTION)
|
||||
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_RESULTS)
|
||||
|| 0 == set_var.var_name_.case_compare(OB_SV_COLLATION_CONNECTION))
|
||||
&& (in_val.get_string().prefix_match_ci("utf16"))) {
|
||||
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
|
||||
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
|
||||
set_var.var_name_.length(),
|
||||
set_var.var_name_.ptr(),
|
||||
in_val.get_string().length(),
|
||||
in_val.get_string().ptr());
|
||||
}
|
||||
}
|
||||
|
||||
//check readonly
|
||||
if (is_set_stmt && sys_var.is_readonly()) {
|
||||
if (sys_var.is_with_upgrade() && GCONF.in_upgrade_mode()) {
|
||||
|
@ -656,8 +656,8 @@ int ObExprCast::get_cast_type(const bool enable_decimal_int,
|
||||
dst_type.set_udt_id(param_type2.get_udt_id());
|
||||
if (ob_is_collection_sql_type(obj_type)) {
|
||||
// recover subschema id
|
||||
dst_type.set_collation_type(static_cast<ObCollationType>(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]));
|
||||
dst_type.set_collation_level(static_cast<ObCollationLevel>(parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX]));
|
||||
dst_type.set_cs_type(static_cast<ObCollationType>(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]));
|
||||
dst_type.set_cs_level(static_cast<ObCollationLevel>(parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX]));
|
||||
}
|
||||
} else if (lib::is_mysql_mode() && ob_is_json(obj_type)) {
|
||||
dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN);
|
||||
|
@ -131,23 +131,27 @@ int ObExprFuncPartHash::calc_hash_value_with_seed(const ObObj &obj, int64_t seed
|
||||
ObObj obj_trimmed;
|
||||
int32_t val_len = obj.get_val_len();
|
||||
const char* obj1_str = obj.get_string_ptr();
|
||||
bool is_utf16 = ObCharset::charset_type_by_coll(obj.get_collation_type()) == CHARSET_UTF16;
|
||||
while (val_len >= (is_utf16 ? 2 : 1)) {
|
||||
if (is_utf16
|
||||
&& OB_PADDING_CHAR == *(obj1_str + val_len - 1)
|
||||
&& OB_PADDING_BINARY == *(obj1_str + val_len - 2)) {
|
||||
val_len -= 2;
|
||||
} else if (OB_PADDING_CHAR == *(obj1_str + val_len - 1)) {
|
||||
--val_len;
|
||||
char* real_end = NULL;
|
||||
// oracle hash test
|
||||
if (OB_FAIL(common::ObCharset::trim_end_of_str(obj1_str, val_len, real_end,
|
||||
ObCharset::charset_type_by_coll(obj.get_collation_type())))){
|
||||
LOG_WARN("fail to trim end of str", K(ret));
|
||||
} else if (OB_ISNULL(real_end)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected null ptr", K(ret));
|
||||
} else {
|
||||
val_len = real_end - obj1_str;
|
||||
if (val_len < 0) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected length", K(val_len));
|
||||
} else {
|
||||
break;
|
||||
obj_trimmed.set_collation_type(obj.get_collation_type());
|
||||
obj_trimmed.set_string(ObCharType, obj.get_string_ptr(), val_len);
|
||||
if (OB_FAIL(obj_trimmed.hash_murmur(res, seed))) {
|
||||
LOG_WARN("fail to do hash", K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
obj_trimmed.set_collation_type(obj.get_collation_type());
|
||||
obj_trimmed.set_string(ObCharType, obj.get_string_ptr(), val_len);
|
||||
if (OB_FAIL(obj_trimmed.hash_murmur(res, seed))) {
|
||||
LOG_WARN("fail to do hash", K(ret));
|
||||
}
|
||||
} else if (obj.is_decimal_int()) {
|
||||
ret = wide::PartitionHash<ObMurmurHash, ObObj>::calculate(obj, seed, res);
|
||||
} else {
|
||||
@ -338,22 +342,21 @@ int ObExprFuncPartHash::eval_oracle_part_hash(
|
||||
if (ObCharType == arg.datum_meta_.type_
|
||||
|| ObNCharType == arg.datum_meta_.type_) {
|
||||
ObDatum str = *d;
|
||||
const bool is_utf16 = CHARSET_UTF16 == ObCharset::charset_type_by_coll(
|
||||
arg.datum_meta_.cs_type_);
|
||||
const char *end = str.ptr_ + str.len_;
|
||||
while (end - str.ptr_ >= (is_utf16 ? 2 : 1)) {
|
||||
if (is_utf16 && OB_PADDING_CHAR == *(end - 1) && OB_PADDING_BINARY == *(end - 2)) {
|
||||
end -= 2;
|
||||
} else if (OB_PADDING_CHAR == *(end - 1)) {
|
||||
end -= 1;
|
||||
} else {
|
||||
break;
|
||||
char *end = NULL;
|
||||
if (OB_FAIL(common::ObCharset::trim_end_of_str(str.ptr_, str.len_, end, ObCharset::charset_type_by_coll(arg.datum_meta_.cs_type_)))) {
|
||||
LOG_WARN("failed to trim str end");
|
||||
} else if (OB_ISNULL(end)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("failed to get end of string", K(ret));
|
||||
} else {
|
||||
str.len_ = end - str.ptr_;
|
||||
if (str.len_ < 0) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("failed to get end of string", K(str.len_));
|
||||
} else if (OB_FAIL(arg.basic_funcs_->murmur_hash_(str, hash_val, hash_val))) {
|
||||
LOG_WARN("hash failed", K(ret));
|
||||
}
|
||||
}
|
||||
str.len_ = end - str.ptr_;
|
||||
if (OB_FAIL(arg.basic_funcs_->murmur_hash_(str, hash_val, hash_val))) {
|
||||
LOG_WARN("hash failed", K(ret));
|
||||
}
|
||||
} else if (arg.datum_meta_.type_ == ObDecimalIntType) {
|
||||
ret = wide::PartitionHash<ObMurmurHash, ObDatum>::calculate(*d, hash_val, hash_val);
|
||||
} else {
|
||||
|
@ -132,14 +132,14 @@ int ObExprNLSSort::eval_nlssort_inner(const ObExpr &expr,
|
||||
LOG_WARN("invalid cs", K(ret), K(coll_type));
|
||||
} else if (((ob_is_nchar(arg0_obj_type)) || (ob_is_char(arg0_obj_type, arg0_coll_type)))
|
||||
&& (OB_FAIL(ObCharsetUtils::remove_char_endspace(input_str,
|
||||
ObCharset::charset_type_by_coll(arg0_coll_type))))) {
|
||||
ObCharset::get_charset(arg0_coll_type))))) {
|
||||
LOG_WARN("remove char endspace failed", K(ret));
|
||||
} else if (OB_FAIL(convert_to_coll_code(ctx, arg0_coll_type, input_str, coll_type, out))) {
|
||||
LOG_WARN("convert to coll code failed", K(ret));
|
||||
} else {
|
||||
LOG_DEBUG("check coll type", K(coll_type), K(arg0_coll_type), K(expr),
|
||||
K(arg0_obj_type), K(out.length()));
|
||||
size_t buf_len = cs->coll->strnxfrmlen(cs, out.length());
|
||||
size_t buf_len = cs->coll->strnxfrmlen(cs, cs->mbmaxlen*out.length());
|
||||
char *buf = NULL;
|
||||
size_t result_len = 0;
|
||||
if (OB_ISNULL(buf = expr.get_str_res_mem(ctx, buf_len))) {
|
||||
|
@ -969,7 +969,7 @@ int ObSPIService::spi_calc_expr(ObPLExecCtx *ctx,
|
||||
} else {
|
||||
ObString res = result->get_string();
|
||||
OZ (ObCharsetUtils::remove_char_endspace( // this function only adjust res.data_length_
|
||||
res, ObCharset::charset_type_by_coll(result->get_collation_type())));
|
||||
res, ObCharset::get_charset(result->get_collation_type())));
|
||||
OX (result->val_len_ = res.length());
|
||||
}
|
||||
} else {
|
||||
|
@ -643,6 +643,12 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "charset not supported when data_version < 4_2_4_0 or between [430,433)",K(charset_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.4 or between [430,433), charset is");
|
||||
} else if ((CHARSET_SJIS == charset_type || CHARSET_HKSCS == charset_type || CHARSET_HKSCS31 == charset_type
|
||||
|| CHARSET_DEC8 == charset_type || CHARSET_BIG5 == charset_type || CHARSET_UTF16LE == charset_type)
|
||||
&& ((data_version < MOCK_DATA_VERSION_4_2_5_0) || (DATA_VERSION_4_3_0_0 <= data_version && data_version < DATA_VERSION_4_3_4_0))) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "charset not supported when data_version < 4_2_5_0 or between [430,434)",K(charset_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.5 or between [430,434), charset is");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -655,12 +661,26 @@ int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type,
|
||||
SQL_LOG(WARN, "failed to GET_MIN_DATA_VERSION", K(ret));
|
||||
} else if ((data_version < MOCK_DATA_VERSION_4_2_4_0
|
||||
|| (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_3_0))
|
||||
&& (CS_TYPE_UTF8MB4_CROATIAN_CI == collation_type
|
||||
&& (CS_TYPE_UTF8MB4_CROATIAN_UCA_CI == collation_type
|
||||
|| CS_TYPE_UTF8MB4_UNICODE_520_CI == collation_type
|
||||
|| CS_TYPE_UTF8MB4_CZECH_CI == collation_type)) {
|
||||
|| CS_TYPE_UTF8MB4_CZECH_UCA_CI == collation_type
|
||||
|| CS_TYPE_UTF8MB4_0900_AI_CI == collation_type)) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_2_0", K(collation_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.2, unicode collation is");
|
||||
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433)", K(collation_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433), unicode collation is");
|
||||
} else if ((data_version < MOCK_DATA_VERSION_4_2_5_0
|
||||
|| (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_4_0))
|
||||
&& (CS_TYPE_UTF8MB4_ZH_0900_AS_CS != collation_type &&
|
||||
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI != collation_type &&
|
||||
CS_TYPE_UTF8MB4_UNICODE_520_CI != collation_type &&
|
||||
CS_TYPE_UTF8MB4_CZECH_UCA_CI != collation_type &&
|
||||
CS_TYPE_UTF8MB4_0900_AI_CI != collation_type &&
|
||||
((CS_TYPE_UTF8MB4_0900_AI_CI <= collation_type && collation_type <= CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS)
|
||||
|| (CS_TYPE_UTF16_ICELANDIC_UCA_CI <= collation_type && collation_type <= CS_TYPE_UTF16_VIETNAMESE_CI)
|
||||
|| (CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI <= collation_type && collation_type <= CS_TYPE_UTF8MB4_VIETNAMESE_CI)))) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_5_0 or between [430,434)", K(collation_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Unicode collation not supported when data_version < 4_2_5_0 or between [430,434), unicode collation is");
|
||||
}
|
||||
#ifndef OB_BUILD_CLOSE_MODULES
|
||||
if (OB_SUCC(ret)) {
|
||||
@ -673,15 +693,6 @@ int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (OB_SUCC(ret)) {
|
||||
if ((CS_TYPE_UTF8MB4_0900_AI_CI == collation_type) &&
|
||||
((data_version < MOCK_DATA_VERSION_4_2_4_0) ||
|
||||
(DATA_VERSION_4_3_0_0 <= data_version && data_version < DATA_VERSION_4_3_3_0))) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433)", K(collation_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.4 or between [430,433), collation is");
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1254,31 +1265,37 @@ int ObSQLUtils::check_and_convert_table_name(const ObCollationType cs_type,
|
||||
char origin_name[OB_MAX_USER_TABLE_NAME_LENGTH_ORACLE * OB_MAX_CHAR_LEN + 1] = {'\0'};
|
||||
MEMCPY(origin_name, name_str, name_len);
|
||||
if (!preserve_lettercase) {
|
||||
ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, name);
|
||||
}
|
||||
bool check_for_path_chars = false;
|
||||
int64_t max_ident_len = max_user_table_name_length;
|
||||
if ((stmt::T_SELECT == stmt_type || stmt::T_INSERT == stmt_type) && is_index_table) {
|
||||
//索引表会有额外前缀,因此查询时长度限制用OB_MAX_TABLE_NAME_LENGTH
|
||||
max_ident_len = OB_MAX_TABLE_NAME_LENGTH;
|
||||
}
|
||||
if (OB_ERR_WRONG_IDENT_NAME == (ret = check_ident_name(CS_TYPE_UTF8MB4_GENERAL_CI,
|
||||
name,
|
||||
check_for_path_chars,
|
||||
max_ident_len))) {
|
||||
if (lib::is_oracle_mode()) {
|
||||
// It allows the last char of table name and index name is space in oracle mode
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
size_t sz = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, name);
|
||||
if (sz == 0) {
|
||||
ret = OB_WRONG_TABLE_NAME;
|
||||
LOG_USER_ERROR(OB_WRONG_TABLE_NAME, (int)strlen(origin_name), origin_name);
|
||||
LOG_WARN("Incorrect table name", K(origin_name), K(ret));
|
||||
LOG_WARN("fail to convert table name to lower case", K(name), K(ret));
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
bool check_for_path_chars = false;
|
||||
int64_t max_ident_len = max_user_table_name_length;
|
||||
if ((stmt::T_SELECT == stmt_type || stmt::T_INSERT == stmt_type) && is_index_table) {
|
||||
//索引表会有额外前缀,因此查询时长度限制用OB_MAX_TABLE_NAME_LENGTH
|
||||
max_ident_len = OB_MAX_TABLE_NAME_LENGTH;
|
||||
}
|
||||
if (OB_ERR_WRONG_IDENT_NAME == (ret = check_ident_name(CS_TYPE_UTF8MB4_GENERAL_CI,
|
||||
name,
|
||||
check_for_path_chars,
|
||||
max_ident_len))) {
|
||||
if (lib::is_oracle_mode()) {
|
||||
// It allows the last char of table name and index name is space in oracle mode
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
ret = OB_WRONG_TABLE_NAME;
|
||||
LOG_USER_ERROR(OB_WRONG_TABLE_NAME, (int)strlen(origin_name), origin_name);
|
||||
LOG_WARN("Incorrect table name", K(origin_name), K(ret));
|
||||
}
|
||||
} else if (OB_ERR_TOO_LONG_IDENT == ret) {
|
||||
LOG_USER_ERROR(OB_ERR_TOO_LONG_IDENT, (int)strlen(origin_name), origin_name);
|
||||
LOG_WARN("table name is too long", K(origin_name), K(max_ident_len), K(ret), K(stmt_type), K(is_index_table));
|
||||
} else if (OB_FAIL(ret)) {
|
||||
LOG_WARN("fail to check ident name", K(origin_name), K(ret));
|
||||
}
|
||||
} else if (OB_ERR_TOO_LONG_IDENT == ret) {
|
||||
LOG_USER_ERROR(OB_ERR_TOO_LONG_IDENT, (int)strlen(origin_name), origin_name);
|
||||
LOG_WARN("table name is too long", K(origin_name), K(max_ident_len), K(ret), K(stmt_type), K(is_index_table));
|
||||
} else if (OB_FAIL(ret)) {
|
||||
LOG_WARN("fail to check ident name", K(origin_name), K(ret));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -56,6 +56,8 @@ if (OB_BUILD_ORACLE_PARSER)
|
||||
sql_parser_oracle_single_byte_mode_lex.h
|
||||
sql_parser_oracle_single_byte_mode_tab.c
|
||||
sql_parser_oracle_single_byte_mode_tab.h
|
||||
sql_parser_oracle_hkscs_mode_lex.c
|
||||
sql_parser_oracle_hkscs_mode_tab.c
|
||||
)
|
||||
|
||||
set(ob_inner_sql_parser_object_list
|
||||
@ -63,6 +65,7 @@ if (OB_BUILD_ORACLE_PARSER)
|
||||
non_reserved_keywords_oracle_utf8_mode.c
|
||||
non_reserved_keywords_oracle_gbk_mode.c
|
||||
non_reserved_keywords_oracle_single_byte_mode.c
|
||||
non_reserved_keywords_oracle_hkscs_mode.c
|
||||
)
|
||||
endif()
|
||||
|
||||
@ -89,6 +92,7 @@ set(ob_inner_sql_parser_object_list
|
||||
ob_char_type.h
|
||||
ob_fast_parser.h
|
||||
ob_fast_parser.cpp
|
||||
ob_parser_charset_utils.cpp
|
||||
sql_parser_base.c
|
||||
sql_parser_base.h
|
||||
sql_parser_base.h
|
||||
|
@ -206,6 +206,57 @@ rm -f ../../../src/sql/parser/gbk.txt
|
||||
rm -f ../../../src/sql/parser/sql_parser_oracle_gbk_mode.l
|
||||
rm -f ../../../src/sql/parser/sql_parser_oracle_gbk_mode.y
|
||||
|
||||
# generate oracle hkscs sql_parser(support multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis)
|
||||
##1.copy lex and yacc files
|
||||
cat ../../../src/sql/parser/sql_parser_oracle_mode.y > ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
cat ../../../src/sql/parser/sql_parser_oracle_mode.l > ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
##2.replace name
|
||||
sed "s/obsql_oracle_yy/obsql_oracle_hkscs_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
sed "s/obsql_oracle_yy/obsql_oracle_hkscs_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/sql_parser_oracle_mode/sql_parser_oracle_hkscs_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
sed "s/sql_parser_oracle_mode/sql_parser_oracle_hkscs_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_hkscs_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_hkscs_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/obsql_oracle_fast_parse/obsql_oracle_hkscs_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
sed "s/obsql_oracle_multi_fast_parse/obsql_oracle_hkscs_multi_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
sed "s/obsql_oracle_multi_values_parse/obsql_oracle_hkscs_multi_values_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
##3.add multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis code.
|
||||
sed "s/multi_byte_space \[\\\u3000\]/multi_byte_space ([\\\xa1][\\\x40])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/multi_byte_comma \[\\\uff0c\]/multi_byte_comma ([\\\xa1][\\\x41])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/multi_byte_left_parenthesis \[\\\uff08\]/multi_byte_left_parenthesis ([\\\xa1][\\\x5d])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed "s/multi_byte_right_parenthesis \[\\\uff09\]/multi_byte_right_parenthesis ([\\\xa1][\\\x5e])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
echo "HK_1 [\x81-\xfe]
|
||||
HK_1_1 [\x81-\xa0]
|
||||
HK_1_2 [\xa1]
|
||||
HK_1_3 [\xa2-\xfe]
|
||||
HK_2fb [\x40-\x7e]
|
||||
HK_2fb_1 [\x42-\x5c]
|
||||
HK_2fb_2 [\x5f-\xa1]
|
||||
HK_2sb [\xa1-\xfe]
|
||||
g_except_space_comma_parenthesis ({HK_1_2}{HK_2fb_1}|{HK_1_2}{HK_2fb_2})
|
||||
HK_CHAR ({HK_1_1}{HK_2fb}|{HK_1_1}{HK_2sb}|{g_except_space_comma_parenthesis}|{HK_1_2}{HK_2sb}|{HK_1_3}{HK_2fb}|{HK_1_3}{HK_2sb})" > ../../../src/sql/parser/hkscs.txt
|
||||
sed '/following character status will be rewrite by gen_parse.sh according to connection character/d' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//r ../../../src/sql/parser/hkscs.txt' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//d' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed 's/space \[ \\t\\n\\r\\f\]/space (\[ \\t\\n\\r\\f\]|{multi_byte_space})/g' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
sed 's/multi_byte_connect_char/HK_CHAR/g' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
##4.generate oracle hkscs parser files
|
||||
bison_parser ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_tab.c
|
||||
flex -o ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_tab.h
|
||||
##5.replace other info
|
||||
sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
|
||||
sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
|
||||
sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
|
||||
sed "/obsql_oracle_hkscs_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
|
||||
cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
|
||||
sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_hkscs_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
|
||||
sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_hkscs_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
|
||||
##6.clean useless files
|
||||
rm -f ../../../src/sql/parser/hkscs.txt
|
||||
rm -f ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
|
||||
rm -f ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
|
||||
|
||||
|
||||
rm -rf ../../../src/sql/parser/sql_parser_oracle_mode.y
|
||||
rm -rf ../../../src/sql/parser/sql_parser_oracle_mode.l
|
||||
|
||||
|
@ -471,6 +471,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
|
||||
idf_pos = is_gbk_char(pos);
|
||||
} else if (charset_info_->mbmaxlen == 1) {
|
||||
idf_pos = is_single_byte_char(pos);
|
||||
} else if (CHARSET_HKSCS == charset_type_ || CHARSET_HKSCS31 == charset_type_) {
|
||||
idf_pos = is_hk_char(pos);
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
@ -1075,6 +1077,47 @@ inline int64_t ObFastParserBase::is_utf8_multi_byte_right_parenthesis(
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::is_hk_multi_byte_space(const char *str, const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
|
||||
0x40 == static_cast<uint8_t>(str[pos + 1])) {
|
||||
idf_pos = pos + 2;
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
inline int64_t ObFastParserBase::is_hk_multi_byte_comma(const char *str, const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
|
||||
0x41 == static_cast<uint8_t>(str[pos + 1])) {
|
||||
idf_pos = pos + 2;
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::is_hk_multi_byte_left_parenthesis(
|
||||
const char *str, const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
|
||||
0x5d == static_cast<uint8_t>(str[pos + 1])) {
|
||||
idf_pos = pos + 2;
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::is_hk_multi_byte_right_parenthesis(
|
||||
const char *str, const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
|
||||
0x5e == static_cast<uint8_t>(str[pos + 1])) {
|
||||
idf_pos = pos + 2;
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
// ([\\\xa1][\\\xa1])
|
||||
inline int64_t ObFastParserBase::is_gbk_multi_byte_space(const char *str, const int64_t pos)
|
||||
{
|
||||
@ -1138,6 +1181,22 @@ inline int64_t ObFastParserBase::is_gbk_char(const int64_t pos)
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
inline int64_t ObFastParserBase::is_hk_char(const int64_t pos)
|
||||
{
|
||||
int64_t idf_pos = -1;
|
||||
if (is_oracle_mode_ &&
|
||||
pos + 2 < raw_sql_.raw_sql_len_ &&
|
||||
(-1 != is_hk_multi_byte_space(raw_sql_.raw_sql_, pos) ||
|
||||
-1 != is_hk_multi_byte_comma(raw_sql_.raw_sql_, pos) ||
|
||||
-1 != is_hk_multi_byte_left_parenthesis(raw_sql_.raw_sql_, pos) ||
|
||||
-1 != is_hk_multi_byte_right_parenthesis(raw_sql_.raw_sql_, pos))) {
|
||||
raw_sql_.scan(2);
|
||||
} else if (is_hk1(raw_sql_.char_at(pos)) && is_hk2(raw_sql_.char_at(pos + 1))) {
|
||||
idf_pos = pos + 2;
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
||||
int64_t ObFastParserBase::is_whitespace(int64_t pos)
|
||||
{
|
||||
int64_t ws_end_pos = -1;
|
||||
@ -1410,6 +1469,27 @@ char *ObFastParserBase::parse_strdup_with_replace_multi_byte_char(
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else if (
|
||||
charset_type_ == 152
|
||||
|| charset_type_ == 153) {
|
||||
if (i + 1 < dup_len) {
|
||||
if (str[i] == (char)0xa1 && str[i+1] == (char)0x40) {//hkscs multi byte space
|
||||
out_str[len++] = ' ';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5d) {
|
||||
//hkscs multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5e) {
|
||||
//hkscs multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
++i;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
@ -1636,6 +1716,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
|
||||
idf_pos = is_gbk_char(pos);
|
||||
} else if (charset_info_->mbmaxlen == 1) {
|
||||
idf_pos = is_single_byte_char(pos);
|
||||
} else if (CHARSET_HKSCS == charset_type_ || CHARSET_HKSCS31 == charset_type_) {
|
||||
idf_pos = is_hk_char(pos);
|
||||
}
|
||||
return idf_pos;
|
||||
}
|
||||
|
@ -399,6 +399,11 @@ protected:
|
||||
// ([\\\xef\][\\\xbc\][\\\x89])
|
||||
int64_t is_utf8_multi_byte_right_parenthesis(const char *str, const int64_t start_pos);
|
||||
// {GB_1}{GB_2}
|
||||
int64_t is_hk_char(const int64_t pos);
|
||||
int64_t is_hk_multi_byte_space(const char *str, const int64_t start_pos);
|
||||
int64_t is_hk_multi_byte_comma(const char *str, const int64_t start_pos);
|
||||
int64_t is_hk_multi_byte_left_parenthesis(const char *str, const int64_t start_pos);
|
||||
int64_t is_hk_multi_byte_right_parenthesis(const char *str, const int64_t start_pos);
|
||||
int64_t is_gbk_char(const int64_t pos);
|
||||
// ([\\\xa1][\\\xa1])
|
||||
int64_t is_gbk_multi_byte_space(const char *str, const int64_t start_pos);
|
||||
@ -438,12 +443,28 @@ protected:
|
||||
return is_valid_char(ch) &&
|
||||
static_cast<uint8_t>(ch) >= 0x81 && static_cast<uint8_t>(ch) <= 0xfe;
|
||||
}
|
||||
// [\x81-\xfe]
|
||||
inline bool is_hk1(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
static_cast<uint8_t>(ch) >= 0x81 && static_cast<uint8_t>(ch) <= 0xfe;
|
||||
}
|
||||
|
||||
// [\x40-\xfe]
|
||||
inline bool is_gb2(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
|
||||
}
|
||||
|
||||
// [\x81-\xfe]
|
||||
inline bool is_hk2(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
((static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0x7e)
|
||||
|| (static_cast<uint8_t>(ch) >= 0xa1 && static_cast<uint8_t>(ch) <= 0xfe));
|
||||
}
|
||||
|
||||
inline bool notascii(char ch)
|
||||
{
|
||||
return is_valid_char(ch) &&
|
||||
|
100
src/sql/parser/ob_parser_charset_utils.cpp
Normal file
100
src/sql/parser/ob_parser_charset_utils.cpp
Normal file
@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "ob_parser_charset_utils.h"
|
||||
#include "lib/charset/ob_charset.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
namespace oceanbase{
|
||||
|
||||
int obcharset_is_gb_charset_of_collation(ObCollationType collation_type, bool *is_gb) {
|
||||
int ret = OB_SUCCESS;
|
||||
*is_gb = false;
|
||||
if (collation_type == CS_TYPE_GBK_CHINESE_CI ||
|
||||
collation_type == CS_TYPE_GBK_BIN ||
|
||||
collation_type == CS_TYPE_GB18030_CHINESE_CI ||
|
||||
collation_type == CS_TYPE_GB18030_BIN ||
|
||||
collation_type == CS_TYPE_GB18030_CHINESE_CS ||
|
||||
(collation_type >= CS_TYPE_GB18030_2022_BIN &&
|
||||
collation_type <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
||||
*is_gb = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int obcharset_is_single_byte_charset_of_collation(ObCollationType collation_type, bool *is_single_byte) {
|
||||
int ret = OB_SUCCESS;
|
||||
*is_single_byte = false;
|
||||
if (collation_type == CS_TYPE_LATIN1_SWEDISH_CI ||
|
||||
collation_type == CS_TYPE_LATIN1_BIN ||
|
||||
collation_type == CS_TYPE_ASCII_GENERAL_CI ||
|
||||
collation_type == CS_TYPE_ASCII_BIN ||
|
||||
collation_type == CS_TYPE_TIS620_BIN ||
|
||||
collation_type == CS_TYPE_TIS620_THAI_CI ||
|
||||
collation_type == CS_TYPE_DEC8_BIN ||
|
||||
collation_type == CS_TYPE_DEC8_SWEDISH_CI) {
|
||||
*is_single_byte = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int obcharset_is_utf8_charset_of_collation(ObCollationType collation_type, bool *is_utf8) {
|
||||
int ret = OB_SUCCESS;
|
||||
*is_utf8 = false;
|
||||
if (collation_type == CS_TYPE_UTF8MB4_GENERAL_CI ||
|
||||
collation_type == CS_TYPE_UTF8MB4_BIN ||
|
||||
collation_type == CS_TYPE_UTF8MB4_UNICODE_CI ||
|
||||
collation_type == CS_TYPE_BINARY ||
|
||||
(collation_type >= CS_TYPE_UTF8MB4_0900_AI_CI &&
|
||||
collation_type <= CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS)
|
||||
) {
|
||||
*is_utf8 = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type) {
|
||||
int ret = OB_SUCCESS;
|
||||
bool is_gb = false;
|
||||
bool is_single_byte = false;
|
||||
bool is_utf8 = false;
|
||||
ObCollationType coll_type = static_cast<ObCollationType>(collation_type);
|
||||
if (OB_ISNULL(parser_type)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
} else if (OB_FAIL(obcharset_is_gb_charset_of_collation(coll_type, &is_gb))) {
|
||||
/* do nothing */
|
||||
} else if (is_gb) {
|
||||
*parser_type = CHARSET_PARSER_TYPE_GB;
|
||||
} else if (coll_type == CS_TYPE_HKSCS_BIN || coll_type == CS_TYPE_HKSCS31_BIN) {
|
||||
*parser_type = CHARSET_PARSER_TYPE_HKSCS;
|
||||
} else if (OB_FAIL(obcharset_is_single_byte_charset_of_collation(coll_type, &is_single_byte))) {
|
||||
/* do nothing */
|
||||
} else if (is_single_byte) {
|
||||
*parser_type = CHARSET_PARSER_TYPE_SINGLE_BYTE;
|
||||
} else if (OB_FAIL(obcharset_is_utf8_charset_of_collation(coll_type, &is_utf8))){
|
||||
/* do nothing */
|
||||
} else if (is_utf8) {
|
||||
*parser_type = CHARSET_PARSER_TYPE_UTF8MB4;
|
||||
} else {
|
||||
ret = -1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
36
src/sql/parser/ob_parser_charset_utils.h
Normal file
36
src/sql/parser/ob_parser_charset_utils.h
Normal file
@ -0,0 +1,36 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H
|
||||
#define OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
|
||||
typedef enum ObCharsetParserType_ {
|
||||
CHARSET_PARSER_TYPE_NONE = 0,
|
||||
CHARSET_PARSER_TYPE_GB,
|
||||
CHARSET_PARSER_TYPE_SINGLE_BYTE,
|
||||
CHARSET_PARSER_TYPE_UTF8MB4,
|
||||
CHARSET_PARSER_TYPE_HKSCS,
|
||||
CHARSET_PARSER_TYPE_MAX,
|
||||
} ObCharsetParserType;
|
||||
|
||||
int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif //OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H
|
@ -235,69 +235,78 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
|
||||
int64_t len = 0;
|
||||
int64_t dup_len = strlen(str);
|
||||
for (int64_t i = 0; i < dup_len; ++i) {
|
||||
switch (*connection_collation_) {
|
||||
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
|
||||
case 87/*CS_TYPE_GBK_BIN*/:
|
||||
case 216/*CS_TYPE_GB18030_2022_BIN*/:
|
||||
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
|
||||
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
|
||||
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
|
||||
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
|
||||
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
|
||||
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
|
||||
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
|
||||
case 249/*CS_TYPE_GB18030_BIN*/: {
|
||||
if (i + 1 < dup_len) {
|
||||
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
|
||||
out_str[len++] = ' ';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa8) {
|
||||
//gbk multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa9) {
|
||||
//gbk multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
++i;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
if (*connection_collation_ == 28/*CS_TYPE_GBK_CHINESE_CI*/
|
||||
|| *connection_collation_ == 87/*CS_TYPE_GBK_BIN*/
|
||||
|| *connection_collation_ == 248/*CS_TYPE_GB18030_CHINESE_CI*/
|
||||
|| *connection_collation_ == 249/*CS_TYPE_GB18030_BIN*/
|
||||
|| (*connection_collation_ >= 216/*CS_TYPE_GB18030_2022_BIN*/
|
||||
&& *connection_collation_ <= 222/*CS_TYPE_GB18030_2022_STROKE_CS*/)) {
|
||||
if (i + 1 < dup_len) {
|
||||
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
|
||||
out_str[len++] = ' ';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa8) {
|
||||
//gbk multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa9) {
|
||||
//gbk multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
++i;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
|
||||
case 46/*CS_TYPE_UTF8MB4_BIN*/:
|
||||
case 63/*CS_TYPE_BINARY*/:
|
||||
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
|
||||
case 245/*CS_TYPE_UTF8MB4_CROATIAN_CI*/:
|
||||
case 246/*CS_TYPE_UTF8MB4_UNICODE_520_CI*/:
|
||||
case 234/*CS_TYPE_UTF8MB4_CZECH_CI*/:
|
||||
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
|
||||
{
|
||||
if (i + 2 < dup_len) {
|
||||
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {
|
||||
//utf8 multi byte space
|
||||
out_str[len++] = ' ';
|
||||
i = i + 2;
|
||||
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x88) {
|
||||
//utf8 multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
i = i + 2;
|
||||
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x89) {
|
||||
//utf8 multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
i = i + 2;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else if (
|
||||
*connection_collation_ == 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/
|
||||
|| *connection_collation_ == 46/*CS_TYPE_UTF8MB4_BIN*/
|
||||
|| *connection_collation_ == 63/*CS_TYPE_BINARY*/
|
||||
|| *connection_collation_ == 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/
|
||||
|| (*connection_collation_ >= 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/
|
||||
&& *connection_collation_ <= 247/*CS_TYPE_UTF8MB4_VIETNAMESE_CI*/)) {
|
||||
if (i + 2 < dup_len) {
|
||||
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {
|
||||
//utf8 multi byte space
|
||||
out_str[len++] = ' ';
|
||||
i = i + 2;
|
||||
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x88) {
|
||||
//utf8 multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
i = i + 2;
|
||||
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x89) {
|
||||
//utf8 multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
i = i + 2;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
default:
|
||||
} else if (
|
||||
*connection_collation_ == 152
|
||||
|| *connection_collation_ == 153) {
|
||||
if (i + 1 < dup_len) {
|
||||
if (str[i] == (char)0xa1 && str[i+1] == (char)0x40) {//hkscs multi byte space
|
||||
out_str[len++] = ' ';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5d) {
|
||||
//hkscs multi byte left parenthesis
|
||||
out_str[len++] = '(';
|
||||
++i;
|
||||
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5e) {
|
||||
//hkscs multi byte right parenthesis
|
||||
out_str[len++] = ')';
|
||||
++i;
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
} else {
|
||||
out_str[len++] = str[i];
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include "sql_parser_base.h"
|
||||
#include "ob_parser_charset_utils.h"
|
||||
|
||||
#define YY_EXTRA_TYPE void *
|
||||
#define yyconst const
|
||||
@ -28,6 +29,7 @@ extern YY_BUFFER_STATE obsql_mysql_yy_scan_bytes (yyconst char *bytes,int len ,y
|
||||
extern void obsql_mysql_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
||||
extern void obsql_mysql_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
||||
#ifdef OB_BUILD_ORACLE_PARSER
|
||||
|
||||
extern int obsql_oracle_single_byte_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
|
||||
extern int obsql_oracle_single_byte_yyparse(ParseResult *result);
|
||||
extern int obsql_oracle_single_byte_multi_fast_parse(ParseResult *p);
|
||||
@ -37,6 +39,7 @@ extern int obsql_oracle_single_byte_yylex_destroy (yyscan_t yyscanner );
|
||||
extern YY_BUFFER_STATE obsql_oracle_single_byte_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_single_byte_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_single_byte_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
||||
|
||||
extern int obsql_oracle_utf8_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
|
||||
extern int obsql_oracle_utf8_yyparse(ParseResult *result);
|
||||
extern int obsql_oracle_utf8_multi_fast_parse(ParseResult *p);
|
||||
@ -46,6 +49,7 @@ extern int obsql_oracle_utf8_yylex_destroy (yyscan_t yyscanner );
|
||||
extern YY_BUFFER_STATE obsql_oracle_utf8_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_utf8_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_utf8_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
||||
|
||||
extern int obsql_oracle_gbk_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
|
||||
extern int obsql_oracle_gbk_yyparse(ParseResult *result);
|
||||
extern int obsql_oracle_gbk_multi_fast_parse(ParseResult *p);
|
||||
@ -55,7 +59,21 @@ extern int obsql_oracle_gbk_yylex_destroy (yyscan_t yyscanner );
|
||||
extern YY_BUFFER_STATE obsql_oracle_gbk_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_gbk_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_gbk_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
||||
|
||||
extern int obsql_oracle_hkscs_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
|
||||
extern int obsql_oracle_hkscs_yyparse(ParseResult *result);
|
||||
extern int obsql_oracle_hkscs_multi_fast_parse(ParseResult *p);
|
||||
extern int obsql_oracle_hkscs_multi_values_parse(ParseResult *p);
|
||||
extern int obsql_oracle_hkscs_fast_parse(ParseResult *p);
|
||||
extern int obsql_oracle_hkscs_yylex_destroy (yyscan_t yyscanner );
|
||||
extern YY_BUFFER_STATE obsql_oracle_hkscs_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_hkscs_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
|
||||
extern void obsql_oracle_hkscs_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
|
||||
|
||||
extern int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type);
|
||||
|
||||
#endif
|
||||
|
||||
int parse_init(ParseResult *p)
|
||||
{
|
||||
int ret = 0; // can not include C++ file "ob_define.h"
|
||||
@ -71,40 +89,29 @@ int parse_init(ParseResult *p)
|
||||
if (OB_LIKELY( 0 == ret)) {
|
||||
#ifdef OB_BUILD_ORACLE_PARSER
|
||||
if (IS_ORACLE_COMPATIBLE) {
|
||||
switch (p->connection_collation_) {
|
||||
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
|
||||
case 87/*CS_TYPE_GBK_BIN*/:
|
||||
case 216/*CS_TYPE_GB18030_2022_BIN*/:
|
||||
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
|
||||
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
|
||||
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
|
||||
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
|
||||
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
|
||||
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
|
||||
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
|
||||
case 249/*CS_TYPE_GB18030_BIN*/:
|
||||
ret = obsql_oracle_gbk_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
|
||||
case 46/*CS_TYPE_UTF8MB4_BIN*/:
|
||||
case 63/*CS_TYPE_BINARY*/:
|
||||
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
|
||||
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
|
||||
ret = obsql_oracle_utf8_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
|
||||
case 47/*CS_TYPE_LATIN1_BIN*/:
|
||||
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
|
||||
case 65/*CS_TYPE_ASCII_BIN*/:
|
||||
case 18/*CS_TYPE_TIS620_THAI_CI*/:
|
||||
case 89/*CS_TYPE_TIS620_BIN*/:
|
||||
ret = obsql_oracle_single_byte_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
default: {
|
||||
ret = -1;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
|
||||
p->connection_collation_);
|
||||
break;
|
||||
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
|
||||
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
|
||||
0 != ret) {
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
|
||||
p->connection_collation_);
|
||||
} else {
|
||||
switch(type) {
|
||||
case CHARSET_PARSER_TYPE_GB:
|
||||
ret = obsql_oracle_gbk_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_SINGLE_BYTE:
|
||||
ret = obsql_oracle_single_byte_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_UTF8MB4:
|
||||
ret = obsql_oracle_utf8_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_HKSCS:
|
||||
ret = obsql_oracle_hkscs_yylex_init_extra(p, &(p->yyscan_info_));
|
||||
break;
|
||||
default:
|
||||
ret = -1;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
|
||||
p->connection_collation_);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -132,40 +139,29 @@ int parse_terminate(ParseResult *p)
|
||||
if (OB_LIKELY(NULL != p->yyscan_info_)) {
|
||||
#ifdef OB_BUILD_ORACLE_PARSER
|
||||
if (IS_ORACLE_COMPATIBLE) {
|
||||
switch (p->connection_collation_) {
|
||||
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
|
||||
case 87/*CS_TYPE_GBK_BIN*/:
|
||||
case 216/*CS_TYPE_GB18030_2022_BIN*/:
|
||||
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
|
||||
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
|
||||
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
|
||||
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
|
||||
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
|
||||
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
|
||||
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
|
||||
case 249/*CS_TYPE_GB18030_BIN*/:
|
||||
ret = obsql_oracle_gbk_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
|
||||
case 46/*CS_TYPE_UTF8MB4_BIN*/:
|
||||
case 63/*CS_TYPE_BINARY*/:
|
||||
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
|
||||
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
|
||||
ret = obsql_oracle_utf8_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
|
||||
case 47/*CS_TYPE_LATIN1_BIN*/:
|
||||
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
|
||||
case 65/*CS_TYPE_ASCII_BIN*/:
|
||||
case 18/*CS_TYPE_TIS620_THAI_CI*/:
|
||||
case 89/*CS_TYPE_TIS620_BIN*/:
|
||||
ret = obsql_oracle_single_byte_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
default: {
|
||||
ret = -1;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
|
||||
p->connection_collation_);
|
||||
break;
|
||||
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
|
||||
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
|
||||
0 != ret) {
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
|
||||
p->connection_collation_);
|
||||
} else {
|
||||
switch(type) {
|
||||
case CHARSET_PARSER_TYPE_GB:
|
||||
ret = obsql_oracle_gbk_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_SINGLE_BYTE:
|
||||
ret = obsql_oracle_single_byte_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_UTF8MB4:
|
||||
ret = obsql_oracle_utf8_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_HKSCS:
|
||||
ret = obsql_oracle_hkscs_yylex_destroy(p->yyscan_info_);
|
||||
break;
|
||||
default:
|
||||
ret = -1;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
|
||||
p->connection_collation_);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -233,113 +229,125 @@ int parse_sql(ParseResult *p, const char *buf, size_t input_len)
|
||||
} else {
|
||||
#ifdef OB_BUILD_ORACLE_PARSER
|
||||
if (IS_ORACLE_COMPATIBLE) {
|
||||
switch (p->connection_collation_) {
|
||||
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
|
||||
case 87/*CS_TYPE_GBK_BIN*/:
|
||||
case 216/*CS_TYPE_GB18030_2022_BIN*/:
|
||||
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
|
||||
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
|
||||
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
|
||||
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
|
||||
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
|
||||
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
|
||||
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
|
||||
case 249/*CS_TYPE_GB18030_BIN*/: {
|
||||
YY_BUFFER_STATE bp = obsql_oracle_gbk_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_gbk_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_gbk_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_gbk_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_gbk_multi_values_parse(p);
|
||||
} else {
|
||||
tmp_ret = obsql_oracle_gbk_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
|
||||
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
|
||||
0 != ret) {
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
|
||||
p->connection_collation_);
|
||||
} else {
|
||||
switch(type) {
|
||||
case CHARSET_PARSER_TYPE_GB: {
|
||||
YY_BUFFER_STATE bp = obsql_oracle_gbk_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_gbk_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_gbk_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_gbk_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_gbk_multi_values_parse(p);
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
tmp_ret = obsql_oracle_gbk_yyparse(p);
|
||||
}
|
||||
}
|
||||
obsql_oracle_gbk_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
|
||||
case 46/*CS_TYPE_UTF8MB4_BIN*/:
|
||||
case 63/*CS_TYPE_BINARY*/:
|
||||
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
|
||||
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
|
||||
{
|
||||
YY_BUFFER_STATE bp = obsql_oracle_utf8_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_utf8_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_utf8_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_utf8_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_utf8_multi_values_parse(p);
|
||||
} else {
|
||||
tmp_ret = obsql_oracle_utf8_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
}
|
||||
}
|
||||
obsql_oracle_gbk_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
obsql_oracle_utf8_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
|
||||
case 65/*CS_TYPE_ASCII_BIN*/:
|
||||
case 18/*CS_TYPE_TIS620_THAI_CI*/:
|
||||
case 89/*CS_TYPE_TIS620_BIN*/:
|
||||
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
|
||||
case 47/*CS_TYPE_LATIN1_BIN*/:{
|
||||
YY_BUFFER_STATE bp = obsql_oracle_single_byte_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_single_byte_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_single_byte_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_single_byte_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_single_byte_multi_values_parse(p);
|
||||
} else {
|
||||
tmp_ret = obsql_oracle_single_byte_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
case CHARSET_PARSER_TYPE_SINGLE_BYTE: {
|
||||
YY_BUFFER_STATE bp = obsql_oracle_single_byte_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_single_byte_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_single_byte_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_single_byte_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_single_byte_multi_values_parse(p);
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
tmp_ret = obsql_oracle_single_byte_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
}
|
||||
}
|
||||
obsql_oracle_single_byte_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
obsql_oracle_single_byte_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ret = OB_PARSER_ERR_UNEXPECTED;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support conn collation: %u",
|
||||
p->connection_collation_);
|
||||
break;
|
||||
case CHARSET_PARSER_TYPE_UTF8MB4: {
|
||||
YY_BUFFER_STATE bp = obsql_oracle_utf8_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_utf8_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_utf8_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_utf8_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_utf8_multi_values_parse(p);
|
||||
} else {
|
||||
tmp_ret = obsql_oracle_utf8_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
}
|
||||
}
|
||||
obsql_oracle_utf8_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
case CHARSET_PARSER_TYPE_HKSCS: {
|
||||
YY_BUFFER_STATE bp = obsql_oracle_hkscs_yy_scan_bytes(buf, len, p->yyscan_info_);
|
||||
obsql_oracle_hkscs_yy_switch_to_buffer(bp, p->yyscan_info_);
|
||||
int tmp_ret = -1;
|
||||
if (p->is_fp_) {
|
||||
tmp_ret = obsql_oracle_hkscs_fast_parse(p);
|
||||
} else if (p->is_multi_query_) {
|
||||
tmp_ret = obsql_oracle_hkscs_multi_fast_parse(p);
|
||||
} else if (p->is_multi_values_parser_) {
|
||||
tmp_ret = obsql_oracle_hkscs_multi_values_parse(p);
|
||||
} else {
|
||||
tmp_ret = obsql_oracle_hkscs_yyparse(p);
|
||||
}
|
||||
if (0 == tmp_ret) {
|
||||
ret = OB_PARSER_SUCCESS;
|
||||
} else if (2 == tmp_ret) {
|
||||
ret = OB_PARSER_ERR_NO_MEMORY;
|
||||
} else {
|
||||
if (0 != p->extra_errno_) {
|
||||
ret = p->extra_errno_;
|
||||
} else {
|
||||
ret = OB_PARSER_ERR_PARSE_SQL;
|
||||
}
|
||||
}
|
||||
obsql_oracle_hkscs_yy_delete_buffer(bp, p->yyscan_info_);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ret = -1;
|
||||
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
|
||||
p->connection_collation_);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -29,6 +29,8 @@
|
||||
#include "parse_malloc.h"
|
||||
#include "ob_non_reserved_keywords.h"
|
||||
#include "parse_define.h"
|
||||
#include "ob_parser_charset_utils.h"
|
||||
|
||||
|
||||
#define MAX_VARCHAR_LENGTH 4194303
|
||||
#define INT16NUM_OVERFLOW INT16_MAX
|
||||
|
@ -121,6 +121,12 @@ _ASCII { REPUT_TOKEN_NEG_SIGN(_ASCII); }
|
||||
_TIS620 { REPUT_TOKEN_NEG_SIGN(_TIS620); }
|
||||
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
|
||||
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }
|
||||
_UTF16LE { REPUT_TOKEN_NEG_SIGN(_UTF16LE); }
|
||||
_SJIS { REPUT_TOKEN_NEG_SIGN(_SJIS); }
|
||||
_BIG5 { REPUT_TOKEN_NEG_SIGN(_BIG5); }
|
||||
_HKSCS { REPUT_TOKEN_NEG_SIGN(_HKSCS); }
|
||||
_HKSCS31 { REPUT_TOKEN_NEG_SIGN(_HKSCS31); }
|
||||
_DEC8 { REPUT_TOKEN_NEG_SIGN(_DEC8); }
|
||||
NOT {
|
||||
int32_t token_ret = NOT; /*fast parameterize don't care NOT or NOT2*/
|
||||
if (!(IS_FAST_PARAMETERIZE)) {
|
||||
|
@ -208,7 +208,7 @@ DYNAMIC_SAMPLING
|
||||
NEG_SIGN
|
||||
|
||||
%token /*can not be relation name*/
|
||||
_BINARY _UTF8 _UTF8MB4 _UTF8MB3 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 _ASCII _TIS620 CNNOP
|
||||
_BINARY _UTF8 _UTF8MB4 _UTF8MB3 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 _ASCII _TIS620 _UTF16LE _SJIS _BIG5 _DEC8 _HKSCS _HKSCS31 CNNOP
|
||||
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
|
||||
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN ALTER_HINT_BEGIN
|
||||
END_P SET_VAR DELIMITER
|
||||
@ -1182,6 +1182,60 @@ _UTF8
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _UTF16LE
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("utf16le", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _SJIS
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("sjis", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _BIG5
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("big5", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _HKSCS
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("hkscs", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _HKSCS31
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("hkscs31", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
| _DEC8
|
||||
{
|
||||
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
|
||||
$$->str_value_ = parse_strdup("dec8", result->malloc_pool_, &($$->str_len_));
|
||||
if (OB_UNLIKELY(NULL == $$->str_value_)) {
|
||||
yyerror(NULL, result, "no more space for mallocing string\n");
|
||||
YYABORT_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
literal:
|
||||
|
@ -272,9 +272,12 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
|
||||
load_args.file_cs_type_ = CS_TYPE_UTF8MB4_BIN;
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (ObCharset::charset_type_by_coll(load_args.file_cs_type_) == CHARSET_UTF16) {
|
||||
int64_t mbminlen = 0;
|
||||
if (OB_FAIL(common::ObCharset::get_mbminlen_by_coll(load_args.file_cs_type_, mbminlen))) {
|
||||
LOG_WARN("unexpected error ", K(ret));
|
||||
} else if (mbminlen > 1) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "utf16 encoded files are");
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "compatible with ascii files are");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -55,7 +55,11 @@ int ObSetNamesResolver::resolve(const ParseNode &parse_tree)
|
||||
// 目前支持gbk,utf16和utf8mb4,只有set names utf16不支持
|
||||
// 如果后续支持更多的字符集,这里需要考虑怎么实现形式更好,
|
||||
// 最好使用函数,目前没有必要
|
||||
if (0 == charset.case_compare("utf16")) {
|
||||
ObCollationType col_type = ObCharset::get_default_collation(ObCharset::charset_type(charset));
|
||||
if (!ObCharset::is_valid_collation(col_type)) {
|
||||
ret = OB_ERR_UNKNOWN_CHARSET;
|
||||
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr());
|
||||
} else if (ObCharset::get_charset(col_type)->mbminlen > 1) {
|
||||
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
|
||||
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
|
||||
static_cast<int>(strlen("character_set_client")), "character_set_client",
|
||||
|
@ -410,15 +410,22 @@ int ObDCLResolver::resolve_user_list_node(ParseNode *user_node,
|
||||
LOG_WARN("The child of user node should not be NULL", K(ret));
|
||||
} else {
|
||||
ParseNode *user_hostname_node = user_node;
|
||||
|
||||
user_name = ObString (user_hostname_node->children_[0]->str_len_, user_hostname_node->children_[0]->str_value_);
|
||||
if (NULL == user_hostname_node->children_[1]) {
|
||||
if (user_hostname_node->children_[0]->type_ != T_IDENT && OB_FAIL(ObSQLUtils::convert_sql_text_to_schema_for_storing(
|
||||
*allocator_, session_info_->get_dtc_params(), user_name))) {
|
||||
LOG_WARN("fail to convert user name to utf8", K(ret), K(user_name),
|
||||
KPHEX(user_name.ptr(), user_name.length()));
|
||||
} else if (NULL == user_hostname_node->children_[1]) {
|
||||
host_name.assign_ptr(OB_DEFAULT_HOST_NAME, static_cast<int32_t>(STRLEN(OB_DEFAULT_HOST_NAME)));
|
||||
} else {
|
||||
host_name.assign_ptr(user_hostname_node->children_[1]->str_value_,
|
||||
static_cast<int32_t>(user_hostname_node->children_[1]->str_len_));
|
||||
}
|
||||
if (OB_FAIL(schema_checker_->get_user_info(params_.session_info_->get_effective_tenant_id(),
|
||||
user_name, host_name, user_info))) {
|
||||
if (OB_FAIL(ret)) {
|
||||
LOG_WARN("failed to get user name", K(ret), K(user_name));
|
||||
} else if (OB_FAIL(schema_checker_->get_user_info(params_.session_info_->get_effective_tenant_id(),
|
||||
user_name, host_name, user_info))) {
|
||||
LOG_WARN("failed to get user info", K(ret), K(user_name));
|
||||
if (OB_USER_NOT_EXIST == ret) {
|
||||
// 跳过, RS统一处理, 兼容MySQL行为
|
||||
|
@ -174,9 +174,11 @@ int ObCreateTenantResolver::resolve(const ParseNode &parse_tree)
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
if (CHARSET_UTF16 == charset_type) {
|
||||
ObCollationType col_type = ObCharset::get_default_collation(charset_type);
|
||||
if (!ObCharset::is_valid_collation(col_type) ||
|
||||
ObCharset::get_charset(ObCharset::get_default_collation(charset_type))->mbminlen > 1) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Use utf16 as database charset");
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Use utf16 and utf16le as database charset");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1120,6 +1120,7 @@ int ObCreateViewResolver::print_star_expanded_view_stmt(common::ObString &expand
|
||||
LOG_WARN("failed to append comma", K(ret));
|
||||
} else {
|
||||
ObSqlString column_name;
|
||||
ObString column_name_copy;
|
||||
if (start_pos != end_pos && OB_FAIL(expanded_str.append(table_name))) {
|
||||
LOG_WARN("failed to append table_name", K(ret));
|
||||
} else if (OB_FAIL(column_name.append("\""))) {
|
||||
@ -1128,7 +1129,11 @@ int ObCreateViewResolver::print_star_expanded_view_stmt(common::ObString &expand
|
||||
LOG_WARN("failed to append column name", K(ret));
|
||||
} else if (OB_FAIL(column_name.append("\""))) {
|
||||
LOG_WARN("failed to append quote", K(ret));
|
||||
} else if (OB_FAIL(expanded_str.append(column_name.string()))) {
|
||||
} else if (OB_FAIL(ob_write_string(*allocator_, column_name.string(), column_name_copy, true))) {
|
||||
LOG_WARN("failed to write string", K(ret));
|
||||
} else if (OB_FAIL(ObSQLUtils::convert_sql_text_from_schema_for_resolve(*allocator_, session_info_->get_dtc_params(), column_name_copy))) {
|
||||
LOG_WARN("failed to convert sql text", K(ret));
|
||||
} else if (OB_FAIL(expanded_str.append(column_name_copy))) {
|
||||
LOG_WARN("failed to append column name", K(ret));
|
||||
}
|
||||
}
|
||||
|
@ -238,7 +238,7 @@ int ObFTParseHelper::segment(
|
||||
} else if (OB_ISNULL(allocator_)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("allocator ptr is nullptr", K(ret), KP_(allocator), K_(is_inited));
|
||||
} else if (OB_UNLIKELY(CS_TYPE_INVALID == type || type >= CS_TYPE_EXTENDED_MARK)) {
|
||||
} else if (OB_UNLIKELY(CS_TYPE_INVALID == type || type >= CS_TYPE_PINYIN_BEGIN_MARK)) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid argument", K(ret), K(type));
|
||||
} else if (OB_ISNULL(cs = common::ObCharset::get_charset(type))) {
|
||||
|
@ -1,9 +1,4 @@
|
||||
|
||||
let $cluster_is_ob = 1;
|
||||
let $cluster_is_mysql = 0;
|
||||
let $collation_count = query_get_value(select count(1) as cnt from information_schema.COLLATIONS, cnt, 1);
|
||||
if ($collation_count > 100) {
|
||||
let $cluster_is_ob = 0;
|
||||
let $cluster_is_mysql = 1;
|
||||
}
|
||||
|
||||
|
@ -992,13 +992,19 @@ select * from information_schema.character_sets;
|
||||
| CHARACTER_SET_NAME | DEFAULT_COLLATE_NAME | DESCRIPTION | MAXLEN |
|
||||
+--------------------+-------------------------+-----------------------+--------+
|
||||
| ascii | ascii_general_ci | US ASCII | 1 |
|
||||
| big5 | big5_chinese_ci | BIG5 | 2 |
|
||||
| binary | binary | Binary pseudo charset | 1 |
|
||||
| dec8 | dec8_swedish_ci | DEC West European | 1 |
|
||||
| gb18030 | gb18030_chinese_ci | GB18030 charset | 4 |
|
||||
| gb18030_2022 | gb18030_2022_chinese_ci | GB18030-2022 charset | 4 |
|
||||
| gbk | gbk_chinese_ci | GBK charset | 2 |
|
||||
| hkscs | hkscs_bin | HKSCS | 2 |
|
||||
| hkscs31 | hkscs31_bin | HKSCS-ISO UNICODE 31 | 2 |
|
||||
| latin1 | latin1_swedish_ci | cp1252 West European | 1 |
|
||||
| sjis | sjis_japanese_ci | SJIS | 2 |
|
||||
| tis620 | tis620_thai_ci | TIS620 Thai | 1 |
|
||||
| utf16 | utf16_general_ci | UTF-16 Unicode | 2 |
|
||||
| utf16 | utf16_general_ci | UTF-16 Unicode | 4 |
|
||||
| utf16le | utf16le_general_ci | UTF-16LE Unicode | 4 |
|
||||
| utf8mb4 | utf8mb4_general_ci | UTF-8 Unicode | 4 |
|
||||
+--------------------+-------------------------+-----------------------+--------+
|
||||
select * from information_schema.statistics where table_schema in ('oceanbase', 'mysql', 'information_schema') and TABLE_NAME not like "ob_all_proxy%" order by TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME;
|
||||
|
@ -13,8 +13,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 2
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -26,23 +24,146 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
|
||||
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
|
||||
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
|
||||
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
ascii_general_ci ascii 11 Yes Yes 1
|
||||
ascii_bin ascii 65 Yes 1
|
||||
tis620_thai_ci tis620 18 Yes Yes 1
|
||||
tis620_bin tis620 89 Yes 1
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf16le_general_ci utf16le 56 Yes Yes 1
|
||||
utf16le_bin utf16le 62 Yes 1
|
||||
sjis_japanese_ci sjis 13 Yes Yes 1
|
||||
sjis_bin sjis 88 Yes 1
|
||||
big5_chinese_ci big5 1 Yes Yes 1
|
||||
big5_bin big5 84 Yes 1
|
||||
hkscs_bin hkscs 152 Yes Yes 1
|
||||
hkscs31_bin hkscs31 153 Yes Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
utf16_icelandic_ci utf16 102 Yes 8
|
||||
utf16_latvian_ci utf16 103 Yes 8
|
||||
utf16_romanian_ci utf16 104 Yes 8
|
||||
utf16_slovenian_ci utf16 105 Yes 8
|
||||
utf16_polish_ci utf16 106 Yes 8
|
||||
utf16_estonian_ci utf16 107 Yes 8
|
||||
utf16_spanish_ci utf16 108 Yes 8
|
||||
utf16_swedish_ci utf16 109 Yes 8
|
||||
utf16_turkish_ci utf16 110 Yes 8
|
||||
utf16_czech_ci utf16 111 Yes 8
|
||||
utf16_danish_ci utf16 112 Yes 8
|
||||
utf16_lithuanian_ci utf16 113 Yes 8
|
||||
utf16_slovak_ci utf16 114 Yes 8
|
||||
utf16_spanish2_ci utf16 115 Yes 8
|
||||
utf16_roman_ci utf16 116 Yes 8
|
||||
utf16_persian_ci utf16 117 Yes 8
|
||||
utf16_esperanto_ci utf16 118 Yes 8
|
||||
utf16_hungarian_ci utf16 119 Yes 8
|
||||
utf16_sinhala_ci utf16 120 Yes 8
|
||||
utf16_german2_ci utf16 121 Yes 8
|
||||
utf16_croatian_ci utf16 122 Yes 8
|
||||
utf16_unicode_520_ci utf16 123 Yes 8
|
||||
utf16_vietnamese_ci utf16 124 Yes 8
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
dec8_swedish_ci dec8 3 Yes Yes 8
|
||||
dec8_bin dec8 69 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
SHOW CHARACTER SET;
|
||||
Charset Description Default collation Maxlen
|
||||
binary Binary pseudo charset binary 1
|
||||
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
|
||||
gbk GBK charset gbk_chinese_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 4
|
||||
gb18030 GB18030 charset gb18030_chinese_ci 4
|
||||
latin1 cp1252 West European latin1_swedish_ci 1
|
||||
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
|
||||
ascii US ASCII ascii_general_ci 1
|
||||
tis620 TIS620 Thai tis620_thai_ci 1
|
||||
utf16le UTF-16LE Unicode utf16le_general_ci 4
|
||||
sjis SJIS sjis_japanese_ci 2
|
||||
big5 BIG5 big5_chinese_ci 2
|
||||
hkscs HKSCS hkscs_bin 2
|
||||
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
|
||||
dec8 DEC West European dec8_swedish_ci 1
|
||||
SET NAMES latin1;
|
||||
|
@ -7,8 +7,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 2
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -20,14 +18,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
|
||||
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
|
||||
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
|
||||
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
ascii_general_ci ascii 11 Yes Yes 1
|
||||
ascii_bin ascii 65 Yes 1
|
||||
tis620_thai_ci tis620 18 Yes Yes 1
|
||||
tis620_bin tis620 89 Yes 1
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf16le_general_ci utf16le 56 Yes Yes 1
|
||||
utf16le_bin utf16le 62 Yes 1
|
||||
sjis_japanese_ci sjis 13 Yes Yes 1
|
||||
sjis_bin sjis 88 Yes 1
|
||||
big5_chinese_ci big5 1 Yes Yes 1
|
||||
big5_bin big5 84 Yes 1
|
||||
hkscs_bin hkscs 152 Yes Yes 1
|
||||
hkscs31_bin hkscs31 153 Yes Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
utf16_icelandic_ci utf16 102 Yes 8
|
||||
utf16_latvian_ci utf16 103 Yes 8
|
||||
utf16_romanian_ci utf16 104 Yes 8
|
||||
utf16_slovenian_ci utf16 105 Yes 8
|
||||
utf16_polish_ci utf16 106 Yes 8
|
||||
utf16_estonian_ci utf16 107 Yes 8
|
||||
utf16_spanish_ci utf16 108 Yes 8
|
||||
utf16_swedish_ci utf16 109 Yes 8
|
||||
utf16_turkish_ci utf16 110 Yes 8
|
||||
utf16_czech_ci utf16 111 Yes 8
|
||||
utf16_danish_ci utf16 112 Yes 8
|
||||
utf16_lithuanian_ci utf16 113 Yes 8
|
||||
utf16_slovak_ci utf16 114 Yes 8
|
||||
utf16_spanish2_ci utf16 115 Yes 8
|
||||
utf16_roman_ci utf16 116 Yes 8
|
||||
utf16_persian_ci utf16 117 Yes 8
|
||||
utf16_esperanto_ci utf16 118 Yes 8
|
||||
utf16_hungarian_ci utf16 119 Yes 8
|
||||
utf16_sinhala_ci utf16 120 Yes 8
|
||||
utf16_german2_ci utf16 121 Yes 8
|
||||
utf16_croatian_ci utf16 122 Yes 8
|
||||
utf16_unicode_520_ci utf16 123 Yes 8
|
||||
utf16_vietnamese_ci utf16 124 Yes 8
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
dec8_swedish_ci dec8 3 Yes Yes 8
|
||||
dec8_bin dec8 69 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
show collation;
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
|
||||
@ -37,8 +152,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 2
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -50,14 +163,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
|
||||
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
|
||||
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
|
||||
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
ascii_general_ci ascii 11 Yes Yes 1
|
||||
ascii_bin ascii 65 Yes 1
|
||||
tis620_thai_ci tis620 18 Yes Yes 1
|
||||
tis620_bin tis620 89 Yes 1
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf16le_general_ci utf16le 56 Yes Yes 1
|
||||
utf16le_bin utf16le 62 Yes 1
|
||||
sjis_japanese_ci sjis 13 Yes Yes 1
|
||||
sjis_bin sjis 88 Yes 1
|
||||
big5_chinese_ci big5 1 Yes Yes 1
|
||||
big5_bin big5 84 Yes 1
|
||||
hkscs_bin hkscs 152 Yes Yes 1
|
||||
hkscs31_bin hkscs31 153 Yes Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
utf16_icelandic_ci utf16 102 Yes 8
|
||||
utf16_latvian_ci utf16 103 Yes 8
|
||||
utf16_romanian_ci utf16 104 Yes 8
|
||||
utf16_slovenian_ci utf16 105 Yes 8
|
||||
utf16_polish_ci utf16 106 Yes 8
|
||||
utf16_estonian_ci utf16 107 Yes 8
|
||||
utf16_spanish_ci utf16 108 Yes 8
|
||||
utf16_swedish_ci utf16 109 Yes 8
|
||||
utf16_turkish_ci utf16 110 Yes 8
|
||||
utf16_czech_ci utf16 111 Yes 8
|
||||
utf16_danish_ci utf16 112 Yes 8
|
||||
utf16_lithuanian_ci utf16 113 Yes 8
|
||||
utf16_slovak_ci utf16 114 Yes 8
|
||||
utf16_spanish2_ci utf16 115 Yes 8
|
||||
utf16_roman_ci utf16 116 Yes 8
|
||||
utf16_persian_ci utf16 117 Yes 8
|
||||
utf16_esperanto_ci utf16 118 Yes 8
|
||||
utf16_hungarian_ci utf16 119 Yes 8
|
||||
utf16_sinhala_ci utf16 120 Yes 8
|
||||
utf16_german2_ci utf16 121 Yes 8
|
||||
utf16_croatian_ci utf16 122 Yes 8
|
||||
utf16_unicode_520_ci utf16 123 Yes 8
|
||||
utf16_vietnamese_ci utf16 124 Yes 8
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
dec8_swedish_ci dec8 3 Yes Yes 8
|
||||
dec8_bin dec8 69 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
show collation test;
|
||||
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your OceanBase version for the right syntax to use near 'test' at line 1
|
||||
SHOW CHARACTER SET;
|
||||
@ -65,23 +295,35 @@ Charset Description Default collation Maxlen
|
||||
binary Binary pseudo charset binary 1
|
||||
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
|
||||
gbk GBK charset gbk_chinese_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 4
|
||||
gb18030 GB18030 charset gb18030_chinese_ci 4
|
||||
latin1 cp1252 West European latin1_swedish_ci 1
|
||||
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
|
||||
ascii US ASCII ascii_general_ci 1
|
||||
tis620 TIS620 Thai tis620_thai_ci 1
|
||||
utf16le UTF-16LE Unicode utf16le_general_ci 4
|
||||
sjis SJIS sjis_japanese_ci 2
|
||||
big5 BIG5 big5_chinese_ci 2
|
||||
hkscs HKSCS hkscs_bin 2
|
||||
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
|
||||
dec8 DEC West European dec8_swedish_ci 1
|
||||
SHOW CHARACTER SET;
|
||||
Charset Description Default collation Maxlen
|
||||
binary Binary pseudo charset binary 1
|
||||
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
|
||||
gbk GBK charset gbk_chinese_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 2
|
||||
utf16 UTF-16 Unicode utf16_general_ci 4
|
||||
gb18030 GB18030 charset gb18030_chinese_ci 4
|
||||
latin1 cp1252 West European latin1_swedish_ci 1
|
||||
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
|
||||
ascii US ASCII ascii_general_ci 1
|
||||
tis620 TIS620 Thai tis620_thai_ci 1
|
||||
utf16le UTF-16LE Unicode utf16le_general_ci 4
|
||||
sjis SJIS sjis_japanese_ci 2
|
||||
big5 BIG5 big5_chinese_ci 2
|
||||
hkscs HKSCS hkscs_bin 2
|
||||
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
|
||||
dec8 DEC West European dec8_swedish_ci 1
|
||||
SHOW CHARACTER SET test;
|
||||
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your OceanBase version for the right syntax to use near 'test' at line 1
|
||||
set names utf8;
|
||||
|
@ -4,33 +4,51 @@ CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN
|
||||
binary binary Binary pseudo charset 1
|
||||
utf8mb4 utf8mb4_general_ci UTF-8 Unicode 4
|
||||
gbk gbk_chinese_ci GBK charset 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 4
|
||||
gb18030 gb18030_chinese_ci GB18030 charset 4
|
||||
latin1 latin1_swedish_ci cp1252 West European 1
|
||||
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
|
||||
ascii ascii_general_ci US ASCII 1
|
||||
tis620 tis620_thai_ci TIS620 Thai 1
|
||||
utf16le utf16le_general_ci UTF-16LE Unicode 4
|
||||
sjis sjis_japanese_ci SJIS 2
|
||||
big5 big5_chinese_ci BIG5 2
|
||||
hkscs hkscs_bin HKSCS 2
|
||||
hkscs31 hkscs31_bin HKSCS-ISO UNICODE 31 2
|
||||
dec8 dec8_swedish_ci DEC West European 1
|
||||
select character_set_name, default_collate_name, description, maxlen from character_sets;
|
||||
character_set_name default_collate_name description maxlen
|
||||
binary binary Binary pseudo charset 1
|
||||
utf8mb4 utf8mb4_general_ci UTF-8 Unicode 4
|
||||
gbk gbk_chinese_ci GBK charset 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 2
|
||||
utf16 utf16_general_ci UTF-16 Unicode 4
|
||||
gb18030 gb18030_chinese_ci GB18030 charset 4
|
||||
latin1 latin1_swedish_ci cp1252 West European 1
|
||||
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
|
||||
ascii ascii_general_ci US ASCII 1
|
||||
tis620 tis620_thai_ci TIS620 Thai 1
|
||||
utf16le utf16le_general_ci UTF-16LE Unicode 4
|
||||
sjis sjis_japanese_ci SJIS 2
|
||||
big5 big5_chinese_ci BIG5 2
|
||||
hkscs hkscs_bin HKSCS 2
|
||||
hkscs31 hkscs31_bin HKSCS-ISO UNICODE 31 2
|
||||
dec8 dec8_swedish_ci DEC West European 1
|
||||
select maxlen from character_sets;
|
||||
maxlen
|
||||
1
|
||||
4
|
||||
2
|
||||
4
|
||||
4
|
||||
1
|
||||
4
|
||||
1
|
||||
1
|
||||
4
|
||||
2
|
||||
2
|
||||
2
|
||||
2
|
||||
4
|
||||
1
|
||||
4
|
||||
1
|
||||
1
|
||||
select * from character_sets where character_set_name like '%binary%';
|
||||
CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN
|
||||
|
@ -8,8 +8,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 2
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -21,14 +19,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
|
||||
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
|
||||
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
|
||||
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
ascii_general_ci ascii 11 Yes Yes 1
|
||||
ascii_bin ascii 65 Yes 1
|
||||
tis620_thai_ci tis620 18 Yes Yes 1
|
||||
tis620_bin tis620 89 Yes 1
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf16le_general_ci utf16le 56 Yes Yes 1
|
||||
utf16le_bin utf16le 62 Yes 1
|
||||
sjis_japanese_ci sjis 13 Yes Yes 1
|
||||
sjis_bin sjis 88 Yes 1
|
||||
big5_chinese_ci big5 1 Yes Yes 1
|
||||
big5_bin big5 84 Yes 1
|
||||
hkscs_bin hkscs 152 Yes Yes 1
|
||||
hkscs31_bin hkscs31 153 Yes Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
utf16_icelandic_ci utf16 102 Yes 8
|
||||
utf16_latvian_ci utf16 103 Yes 8
|
||||
utf16_romanian_ci utf16 104 Yes 8
|
||||
utf16_slovenian_ci utf16 105 Yes 8
|
||||
utf16_polish_ci utf16 106 Yes 8
|
||||
utf16_estonian_ci utf16 107 Yes 8
|
||||
utf16_spanish_ci utf16 108 Yes 8
|
||||
utf16_swedish_ci utf16 109 Yes 8
|
||||
utf16_turkish_ci utf16 110 Yes 8
|
||||
utf16_czech_ci utf16 111 Yes 8
|
||||
utf16_danish_ci utf16 112 Yes 8
|
||||
utf16_lithuanian_ci utf16 113 Yes 8
|
||||
utf16_slovak_ci utf16 114 Yes 8
|
||||
utf16_spanish2_ci utf16 115 Yes 8
|
||||
utf16_roman_ci utf16 116 Yes 8
|
||||
utf16_persian_ci utf16 117 Yes 8
|
||||
utf16_esperanto_ci utf16 118 Yes 8
|
||||
utf16_hungarian_ci utf16 119 Yes 8
|
||||
utf16_sinhala_ci utf16 120 Yes 8
|
||||
utf16_german2_ci utf16 121 Yes 8
|
||||
utf16_croatian_ci utf16 122 Yes 8
|
||||
utf16_unicode_520_ci utf16 123 Yes 8
|
||||
utf16_vietnamese_ci utf16 124 Yes 8
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
dec8_swedish_ci dec8 3 Yes Yes 8
|
||||
dec8_bin dec8 69 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
select collation_name, character_set_name, id, is_default, is_compiled, sortlen from collations;
|
||||
collation_name character_set_name id is_default is_compiled sortlen
|
||||
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
|
||||
@ -38,8 +153,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 2
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -51,23 +164,222 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
|
||||
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
|
||||
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
|
||||
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
ascii_general_ci ascii 11 Yes Yes 1
|
||||
ascii_bin ascii 65 Yes 1
|
||||
tis620_thai_ci tis620 18 Yes Yes 1
|
||||
tis620_bin tis620 89 Yes 1
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf16le_general_ci utf16le 56 Yes Yes 1
|
||||
utf16le_bin utf16le 62 Yes 1
|
||||
sjis_japanese_ci sjis 13 Yes Yes 1
|
||||
sjis_bin sjis 88 Yes 1
|
||||
big5_chinese_ci big5 1 Yes Yes 1
|
||||
big5_bin big5 84 Yes 1
|
||||
hkscs_bin hkscs 152 Yes Yes 1
|
||||
hkscs31_bin hkscs31 153 Yes Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 8
|
||||
utf16_icelandic_ci utf16 102 Yes 8
|
||||
utf16_latvian_ci utf16 103 Yes 8
|
||||
utf16_romanian_ci utf16 104 Yes 8
|
||||
utf16_slovenian_ci utf16 105 Yes 8
|
||||
utf16_polish_ci utf16 106 Yes 8
|
||||
utf16_estonian_ci utf16 107 Yes 8
|
||||
utf16_spanish_ci utf16 108 Yes 8
|
||||
utf16_swedish_ci utf16 109 Yes 8
|
||||
utf16_turkish_ci utf16 110 Yes 8
|
||||
utf16_czech_ci utf16 111 Yes 8
|
||||
utf16_danish_ci utf16 112 Yes 8
|
||||
utf16_lithuanian_ci utf16 113 Yes 8
|
||||
utf16_slovak_ci utf16 114 Yes 8
|
||||
utf16_spanish2_ci utf16 115 Yes 8
|
||||
utf16_roman_ci utf16 116 Yes 8
|
||||
utf16_persian_ci utf16 117 Yes 8
|
||||
utf16_esperanto_ci utf16 118 Yes 8
|
||||
utf16_hungarian_ci utf16 119 Yes 8
|
||||
utf16_sinhala_ci utf16 120 Yes 8
|
||||
utf16_german2_ci utf16 121 Yes 8
|
||||
utf16_croatian_ci utf16 122 Yes 8
|
||||
utf16_unicode_520_ci utf16 123 Yes 8
|
||||
utf16_vietnamese_ci utf16 124 Yes 8
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
dec8_swedish_ci dec8 3 Yes Yes 8
|
||||
dec8_bin dec8 69 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
select * from collations where collation_name like '%utf8%';
|
||||
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
|
||||
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
|
||||
utf8mb4_bin utf8mb4 46 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 8
|
||||
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
|
||||
utf8mb4_latvian_ci utf8mb4 226 Yes 8
|
||||
utf8mb4_romanian_ci utf8mb4 227 Yes 8
|
||||
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
|
||||
utf8mb4_polish_ci utf8mb4 229 Yes 8
|
||||
utf8mb4_estonian_ci utf8mb4 230 Yes 8
|
||||
utf8mb4_spanish_ci utf8mb4 231 Yes 8
|
||||
utf8mb4_swedish_ci utf8mb4 232 Yes 8
|
||||
utf8mb4_turkish_ci utf8mb4 233 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_danish_ci utf8mb4 235 Yes 8
|
||||
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
|
||||
utf8mb4_slovak_ci utf8mb4 237 Yes 8
|
||||
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
|
||||
utf8mb4_roman_ci utf8mb4 239 Yes 8
|
||||
utf8mb4_persian_ci utf8mb4 240 Yes 8
|
||||
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
|
||||
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
|
||||
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
|
||||
utf8mb4_german2_ci utf8mb4 244 Yes 8
|
||||
utf8mb4_croatian_ci utf8mb4 245 Yes 8
|
||||
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
|
||||
utf8mb4_czech_ci utf8mb4 234 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
|
||||
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
|
||||
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
|
||||
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
|
||||
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
|
||||
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
|
||||
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
|
||||
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
|
||||
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
|
||||
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
|
||||
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
|
||||
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
|
||||
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
|
||||
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
|
||||
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
|
||||
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
|
||||
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
|
||||
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
|
||||
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
|
||||
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
|
||||
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
|
||||
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
|
||||
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
|
||||
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
|
||||
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
|
||||
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
|
||||
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
|
||||
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
|
||||
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
|
||||
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
|
||||
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
|
||||
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
|
||||
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
|
||||
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
|
||||
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
|
||||
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
|
||||
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
|
||||
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
|
||||
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
|
||||
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
|
||||
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
|
||||
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
|
||||
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
|
||||
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
|
||||
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
|
||||
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
|
||||
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
|
||||
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
|
||||
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
|
||||
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
|
||||
utf8mb4_0900_bin utf8mb4 309 Yes 1
|
||||
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
|
||||
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
|
||||
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
|
||||
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
|
||||
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
|
||||
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
|
||||
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
|
||||
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
|
||||
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
|
||||
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
|
||||
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
|
||||
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
|
||||
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
|
||||
show create table collations;
|
||||
View Create View character_set_client collation_connection
|
||||
COLLATIONS CREATE VIEW `COLLATIONS` AS select collation as COLLATION_NAME, charset as CHARACTER_SET_NAME, id as ID, `is_default` as IS_DEFAULT, is_compiled as IS_COMPILED, sortlen as SORTLEN from oceanbase.__tenant_virtual_collation utf8mb4 utf8mb4_general_ci
|
||||
|
@ -51,7 +51,7 @@ int segment_and_calc_word_count(
|
||||
int64_t doc_length = 0;
|
||||
if (OB_ISNULL(helper)
|
||||
|| OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type
|
||||
|| ObCollationType::CS_TYPE_EXTENDED_MARK < type)
|
||||
|| ObCollationType::CS_TYPE_PINYIN_BEGIN_MARK <= type)
|
||||
|| OB_UNLIKELY(!words_count.created())) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created()));
|
||||
@ -510,7 +510,7 @@ TEST_F(ObTestFTParseHelper, test_parse_fulltext)
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, ft_word_map));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_PINYIN_BEGIN_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, ft_word_map));
|
||||
|
||||
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));
|
||||
@ -681,7 +681,7 @@ TEST_F(ObTestNgramFTParseHelper, test_parse_fulltext)
|
||||
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_PINYIN_BEGIN_MARK, ObTestAddWord::TEST_FULLTEXT,
|
||||
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
|
||||
|
||||
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));
|
||||
|
Loading…
x
Reference in New Issue
Block a user