[CP] [FEAT MERGE]字符集从42xrelease分支 patch 合入master分支

This commit is contained in:
qingzhu521 2024-09-26 09:17:10 +00:00 committed by ob-robot
parent e2ca6caa6a
commit 0dc70f6600
87 changed files with 41030 additions and 2306 deletions

5
.gitignore vendored
View File

@ -148,6 +148,11 @@ src/sql/parser/sql_parser_oracle_gbk_mode_lex.c
src/sql/parser/sql_parser_oracle_gbk_mode_lex.h
src/sql/parser/sql_parser_oracle_gbk_mode_tab.c
src/sql/parser/sql_parser_oracle_gbk_mode_tab.h
src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
src/sql/parser/sql_parser_oracle_hkscs_mode_lex.h
src/sql/parser/sql_parser_oracle_hkscs_mode_tab.c
src/sql/parser/sql_parser_oracle_hkscs_mode_tab.h
src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
src/sql/parser/sql_parser_oracle_utf8_mode_lex.c
src/sql/parser/sql_parser_oracle_utf8_mode_lex.h
src/sql/parser/sql_parser_oracle_utf8_mode_tab.c

View File

@ -1297,7 +1297,7 @@ inline int obj_print_plain_str<ObHexStringType>(const ObObj &obj, char *buffer,
PRINT_META(); \
BUF_PRINTO(ob_obj_type_str(obj.get_type())); \
J_COLON(); \
if (obj.is_binary() || src_type == CHARSET_UTF16) { \
if (obj.is_binary() || src_type == CHARSET_UTF16 || src_type == CHARSET_UTF16LE) { \
hex_print(obj.get_string_ptr(), obj.get_string_len(), buf, buf_len, pos); \
} else { \
BUF_PRINTO(obj.get_varchar()); \

View File

@ -303,15 +303,15 @@ public:
OB_INLINE bool is_decimal_int() const { return type_ == static_cast<uint8_t>(ObDecimalIntType); }
OB_INLINE bool is_varchar() const
{
return ((type_ == static_cast<uint8_t>(ObVarcharType)) && (CS_TYPE_BINARY != cs_type_));
return ((type_ == static_cast<uint8_t>(ObVarcharType)) && (CS_TYPE_BINARY != get_collation_type()));
}
OB_INLINE bool is_char() const
{
return ((type_ == static_cast<uint8_t>(ObCharType)) && (CS_TYPE_BINARY != cs_type_));
return ((type_ == static_cast<uint8_t>(ObCharType)) && (CS_TYPE_BINARY != get_collation_type()));
}
OB_INLINE bool is_varbinary() const
{
return (type_ == static_cast<uint8_t>(ObVarcharType) && CS_TYPE_BINARY == cs_type_);
return (type_ == static_cast<uint8_t>(ObVarcharType) && CS_TYPE_BINARY == get_collation_type());
}
static bool is_binary(const ObObjType type, const ObCollationType cs_type)
{
@ -319,11 +319,11 @@ public:
}
OB_INLINE bool is_binary() const
{
return is_binary(static_cast<ObObjType>(type_), static_cast<ObCollationType>(cs_type_));
return is_binary(static_cast<ObObjType>(type_), get_collation_type());
}
OB_INLINE bool is_cs_collation_free() const
{
return cs_type_ == CS_TYPE_UTF8MB4_GENERAL_CI || cs_type_ == CS_TYPE_UTF8MB4_BIN;
return get_collation_type() == CS_TYPE_UTF8MB4_GENERAL_CI || get_collation_type() == CS_TYPE_UTF8MB4_BIN;
}
OB_INLINE bool is_hex_string() const { return type_ == static_cast<uint8_t>(ObHexStringType); }
OB_INLINE bool is_raw() const { return type_ == static_cast<uint8_t>(ObRawType); }
@ -337,23 +337,23 @@ public:
|| type_ == static_cast<uint8_t>(ObSetType); }
OB_INLINE bool is_text() const
{
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY != cs_type_);
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY != get_collation_type());
}
/*OB_INLINE bool is_oracle_clob() const
{
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != cs_type_);
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != get_collation_type());
}*/
OB_INLINE bool is_clob() const
{
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != cs_type_);
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY != get_collation_type());
}
/*OB_INLINE bool is_oracle_blob() const
{
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY == cs_type_);
return (lib::is_oracle_mode() && ObLongTextType == get_type() && CS_TYPE_BINARY == get_collation_type());
}*/
OB_INLINE bool is_blob() const
{
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY == cs_type_);
return (ob_is_text_tc(get_type()) && CS_TYPE_BINARY == get_collation_type());
}
OB_INLINE bool is_lob_storage() const
{ return ob_is_large_text(get_type())
@ -416,24 +416,46 @@ public:
OB_INLINE bool is_oracle_decimal() const { return ObNumberType == type_ || ObFloatType == type_ || ObDoubleType == type_ || ObDecimalIntType == type_; }
OB_INLINE bool is_urowid() const { return ObURowIDType == type_; }
OB_INLINE bool is_blob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY == cs_type_); }
OB_INLINE bool is_clob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY != cs_type_); }
OB_INLINE bool is_blob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY == get_collation_type()); }
OB_INLINE bool is_clob_locator() const { return (ObLobType == type_ && CS_TYPE_BINARY != get_collation_type()); }
OB_INLINE bool is_lob_locator() const { return ObLobType == type_; }
OB_INLINE bool is_interval_type() const { return is_interval_ds() || is_interval_ym(); }
OB_INLINE bool is_oracle_temporal_type() const { return is_datetime() || is_otimestamp_type() || is_interval_type(); }
OB_INLINE void set_collation_level(ObCollationLevel cs_level) { cs_level_ = cs_level; }
OB_INLINE void set_collation_type(ObCollationType cs_type) { cs_type_ = cs_type; }
OB_INLINE ObCollationType get_collation_type() {
// ObUserDefinedSQLType reused cs_type as part of sub schema id, therefore always return CS_TYPE_BINARY
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY : static_cast<ObCollationType>(cs_type_);
OB_INLINE void set_cs_level(uint8_t cs_level) {
cs_level_ = cs_level;
}
OB_INLINE uint8_t get_cs_level() {
return cs_level_;
}
OB_INLINE void set_cs_type(uint8_t cs_type) {
cs_type_ = cs_type;
}
OB_INLINE uint8_t get_cs_type() {
return cs_type_;
}
OB_INLINE void set_collation_level(ObCollationLevel cs_level) {
cs_level_ = (cs_level_ & 0xF0) | (cs_level & 0xF);
}
OB_INLINE void set_collation_type(ObCollationType cs_type) {
cs_type_ = (cs_type & 0xFF);
cs_level_ = (cs_level_ & 0xF) | ((cs_type & 0xF00) >> 4);
}
OB_INLINE ObCollationType get_collation_type() {
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY:
static_cast<ObCollationType>((uint16_t)cs_type_ | (((uint16_t)cs_level_ & 0xF0) << 4));
}
OB_INLINE void set_default_collation_type() { set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); }
OB_INLINE ObCollationLevel get_collation_level() const { return static_cast<ObCollationLevel>(cs_level_); }
OB_INLINE ObCollationLevel get_collation_level() const {
return static_cast<ObCollationLevel>(cs_level_ & 0x0F);
}
OB_INLINE ObCollationType get_collation_type() const {
// ObUserDefinedSQLType reused cs_type as part of sub schema id, therefore always return CS_TYPE_BINARY
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY : static_cast<ObCollationType>(cs_type_);
return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY :
static_cast<ObCollationType>((uint16_t)cs_type_ | (((uint16_t)cs_level_ & 0xF0) << 4) );
}
OB_INLINE ObCharsetType get_charset_type() const {
return ObCharset::charset_type_by_coll(get_collation_type());

View File

@ -35,6 +35,10 @@ ob_set_subtarget(oblib_lib charset
charset/uca900_zh2_tbls.cc
charset/uca900_zh3_tbls.cc
charset/ob_charset.cpp
charset/ob_ctype_sjis.cc
charset/ob_ctype_big5.cc
charset/ob_ctype_hkscs.cc
charset/ob_ctype_extra.cc
)
ob_set_subtarget(oblib_lib common

View File

@ -16,6 +16,7 @@
#include <stdint.h>
#include "lib/charset/ob_template_helper.h"
#include <netinet/in.h>
#include <string.h>
/*
Functions for big-endian loads and stores. These are safe to use

File diff suppressed because it is too large Load Diff

View File

@ -40,6 +40,12 @@ enum ObCharsetType
CHARSET_GB18030_2022 = 7,
CHARSET_ASCII = 8,
CHARSET_TIS620 = 9,
CHARSET_UTF16LE = 10,
CHARSET_SJIS = 11,
CHARSET_BIG5 = 12,
CHARSET_HKSCS = 13,
CHARSET_HKSCS31 = 14,
CHARSET_DEC8 = 15,
CHARSET_MAX,
};
@ -51,24 +57,34 @@ enum ObCharsetType
*there is no possibly to reach AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_UTF8MB4] and so on
*/
static const int AGGREGATE_2CHARSET[CHARSET_MAX][CHARSET_MAX] = {
//CHARSET_INVALI,CHARSET_UTF8MB4...
{0,0,0,0,0,0,0,0,0,0},//CHARSET_INVALI
{0,0,0,0,0,0,0,0,0,0},//CHARSET_BINARY
{0,0,0,1,2,1,1,1,1,1},//CHARSET_UTF8MB4
{0,0,2,0,2,0,1,0,1,0},//CHARSET_GBK
{0,0,1,1,0,1,1,1,1,1},//CHARSET_UTF16
{0,0,2,0,2,0,1,0,1,0},//CHARSET_GB18030
{0,0,2,2,2,2,0,2,1,0},//CHARSET_LATIN1
{0,0,2,0,2,0,1,0,1,0}, //CHARSET_GB18030_2022
{0,0,2,2,2,2,2,2,0,2},//CHARSET_ASCII
{0,0,2,0,2,0,0,0,1,0},//CHARSET_TIS620
//CHARSET_INVALID,CHARSET_BINARY,CHARSET_UTF8MB4...
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},//CHARSET_INVALID
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},//CHARSET_BINARY
{0,0,0,1,2,1,1,1,1,1,1,1,1,1,1,1},//CHARSET_UTF8MB4
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GBK
{0,0,1,1,0,1,1,1,1,1,0,1,1,1,1,1},//CHARSET_UTF16
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GB18030
{0,0,2,2,2,2,0,2,1,0,2,0,0,0,0,0},//CHARSET_LATIN1
{0,0,2,0,2,0,1,0,1,0,2,0,0,0,0,0},//CHARSET_GB18030_2022
{0,0,2,2,2,2,2,2,0,2,2,2,2,2,2,2},//CHARSET_ASCII
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0},//CHARSET_TIS620
{0,0,2,1,0,1,1,1,1,1,0,1,1,1,1,1}, // UTF16LE
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // SJIS
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // BIG5
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // HKSCS
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0}, // HKSCS31
{0,0,2,0,2,0,0,0,1,0,2,0,0,0,0,0},// DEC8
};
enum ObCollationType
{
CS_TYPE_INVALID = 0,
CS_TYPE_BIG5_CHINESE_CI = 1,
CS_TYPE_DEC8_SWEDISH_CI = 3,
CS_TYPE_LATIN1_SWEDISH_CI = 8,
CS_TYPE_ASCII_GENERAL_CI = 11,
CS_TYPE_SJIS_JAPANESE_CI = 13,
CS_TYPE_TIS620_THAI_CI = 18,
CS_TYPE_GBK_CHINESE_CI = 28,
CS_TYPE_UTF8MB4_GENERAL_CI = 45,
@ -76,13 +92,44 @@ enum ObCollationType
CS_TYPE_LATIN1_BIN = 47,
CS_TYPE_UTF16_GENERAL_CI = 54,
CS_TYPE_UTF16_BIN = 55,
CS_TYPE_UTF16LE_GENERAL_CI = 56,
CS_TYPE_UTF16LE_BIN = 62,
CS_TYPE_BINARY = 63,
CS_TYPE_ASCII_BIN = 65,
CS_TYPE_DEC8_BIN = 69,
CS_TYPE_BIG5_BIN = 84,
CS_TYPE_GBK_BIN = 87,
CS_TYPE_SJIS_BIN = 88,
CS_TYPE_TIS620_BIN = 89,
CS_TYPE_COLLATION_FREE = 100, // mysql中间没有使用这个
CS_TYPE_UTF16_UNICODE_CI = 101,
CS_TYPE_UTF16_ICELANDIC_UCA_CI = 102,
CS_TYPE_UTF16_LATVIAN_UCA_CI = 103,
CS_TYPE_UTF16_ROMANIAN_UCA_CI = 104,
CS_TYPE_UTF16_SLOVENIAN_UCA_CI = 105,
CS_TYPE_UTF16_POLISH_UCA_CI = 106,
CS_TYPE_UTF16_ESTONIAN_UCA_CI = 107,
CS_TYPE_UTF16_SPANISH_UCA_CI = 108,
CS_TYPE_UTF16_SWEDISH_UCA_CI = 109,
CS_TYPE_UTF16_TURKISH_UCA_CI = 110,
CS_TYPE_UTF16_CZECH_UCA_CI = 111,
CS_TYPE_UTF16_DANISH_UCA_CI = 112,
CS_TYPE_UTF16_LITHUANIAN_UCA_CI = 113,
CS_TYPE_UTF16_SLOVAK_UCA_CI = 114,
CS_TYPE_UTF16_SPANISH2_UCA_CI = 115,
CS_TYPE_UTF16_ROMAN_UCA_CI = 116,
CS_TYPE_UTF16_PERSIAN_UCA_CI = 117,
CS_TYPE_UTF16_ESPERANTO_UCA_CI = 118,
CS_TYPE_UTF16_HUNGARIAN_UCA_CI = 119,
CS_TYPE_UTF16_SINHALA_UCA_CI = 120,
CS_TYPE_UTF16_GERMAN2_UCA_CI = 121,
CS_TYPE_UTF16_CROATIAN_UCA_CI = 122,
CS_TYPE_UTF16_UNICODE_520_CI = 123,
CS_TYPE_UTF16_VIETNAMESE_CI = 124,
CS_TYPE_ANY = 125, // unused in mysql
CS_TYPE_HKSCS_BIN = 152,
CS_TYPE_HKSCS31_BIN = 153,
CS_TYPE_GB18030_2022_BIN = 216, // unused in mysql
CS_TYPE_GB18030_2022_PINYIN_CI = 217, // unused in mysql
CS_TYPE_GB18030_2022_PINYIN_CS = 218, // unused in mysql
@ -91,20 +138,98 @@ enum ObCollationType
CS_TYPE_GB18030_2022_STROKE_CI = 221, // unused in mysql
CS_TYPE_GB18030_2022_STROKE_CS = 222, // unused in mysql
CS_TYPE_UTF8MB4_UNICODE_CI = 224,
CS_TYPE_UTF8MB4_CZECH_CI = 234,
CS_TYPE_UTF8MB4_CROATIAN_CI = 245,
CS_TYPE_UTF8MB4_UNICODE_520_CI = 246,
CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI,
CS_TYPE_UTF8MB4_LATVIAN_UCA_CI ,
CS_TYPE_UTF8MB4_ROMANIAN_UCA_CI ,
CS_TYPE_UTF8MB4_SLOVENIAN_UCA_CI,
CS_TYPE_UTF8MB4_POLISH_UCA_CI ,
CS_TYPE_UTF8MB4_ESTONIAN_UCA_CI ,
CS_TYPE_UTF8MB4_SPANISH_UCA_CI ,
CS_TYPE_UTF8MB4_SWEDISH_UCA_CI ,
CS_TYPE_UTF8MB4_TURKISH_UCA_CI ,
CS_TYPE_UTF8MB4_CZECH_UCA_CI ,
CS_TYPE_UTF8MB4_DANISH_UCA_CI ,
CS_TYPE_UTF8MB4_LITHUANIAN_UCA_CI,
CS_TYPE_UTF8MB4_SLOVAK_UCA_CI ,
CS_TYPE_UTF8MB4_SPANISH2_UCA_CI,
CS_TYPE_UTF8MB4_ROMAN_UCA_CI,
CS_TYPE_UTF8MB4_PERSIAN_UCA_CI ,
CS_TYPE_UTF8MB4_ESPERANTO_UCA_CI,
CS_TYPE_UTF8MB4_HUNGARIAN_UCA_CI,
CS_TYPE_UTF8MB4_SINHALA_UCA_CI ,
CS_TYPE_UTF8MB4_GERMAN2_UCA_CI ,
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI,
CS_TYPE_UTF8MB4_UNICODE_520_CI ,
CS_TYPE_UTF8MB4_VIETNAMESE_CI ,
CS_TYPE_GB18030_CHINESE_CI = 248,
CS_TYPE_GB18030_BIN = 249,
CS_TYPE_GB18030_CHINESE_CS = 251,
CS_TYPE_UTF8MB4_0900_AI_CI = 255,
CS_TYPE_EXTENDED_MARK = 256, //the cs types below can not used for storing
CS_TYPE_UTF8MB4_0900_BIN, //309 in mysql 8.0
CS_TYPE_UTF8MB4_DE_PB_0900_AI_CI ,
CS_TYPE_UTF8MB4_IS_0900_AI_CI ,
CS_TYPE_UTF8MB4_LV_0900_AI_CI ,
CS_TYPE_UTF8MB4_RO_0900_AI_CI ,
CS_TYPE_UTF8MB4_SL_0900_AI_CI ,
CS_TYPE_UTF8MB4_PL_0900_AI_CI ,
CS_TYPE_UTF8MB4_ET_0900_AI_CI ,
CS_TYPE_UTF8MB4_ES_0900_AI_CI ,
CS_TYPE_UTF8MB4_SV_0900_AI_CI ,
CS_TYPE_UTF8MB4_TR_0900_AI_CI ,
CS_TYPE_UTF8MB4_CS_0900_AI_CI ,
CS_TYPE_UTF8MB4_DA_0900_AI_CI ,
CS_TYPE_UTF8MB4_LT_0900_AI_CI ,
CS_TYPE_UTF8MB4_SK_0900_AI_CI ,
CS_TYPE_UTF8MB4_ES_TRAD_0900_AI_CI,
CS_TYPE_UTF8MB4_LA_0900_AI_CI ,
CS_TYPE_UTF8MB4_EO_0900_AI_CI = 273 ,
CS_TYPE_UTF8MB4_HU_0900_AI_CI ,
CS_TYPE_UTF8MB4_HR_0900_AI_CI ,
CS_TYPE_UTF8MB4_VI_0900_AI_CI = 277 ,
CS_TYPE_UTF8MB4_0900_AS_CS ,
CS_TYPE_UTF8MB4_DE_PB_0900_AS_CS ,
CS_TYPE_UTF8MB4_IS_0900_AS_CS ,
CS_TYPE_UTF8MB4_LV_0900_AS_CS ,
CS_TYPE_UTF8MB4_RO_0900_AS_CS ,
CS_TYPE_UTF8MB4_SL_0900_AS_CS ,
CS_TYPE_UTF8MB4_PL_0900_AS_CS ,
CS_TYPE_UTF8MB4_ET_0900_AS_CS ,
CS_TYPE_UTF8MB4_ES_0900_AS_CS ,
CS_TYPE_UTF8MB4_SV_0900_AS_CS ,
CS_TYPE_UTF8MB4_TR_0900_AS_CS ,
CS_TYPE_UTF8MB4_CS_0900_AS_CS ,
CS_TYPE_UTF8MB4_DA_0900_AS_CS ,
CS_TYPE_UTF8MB4_LT_0900_AS_CS ,
CS_TYPE_UTF8MB4_SK_0900_AS_CS ,
CS_TYPE_UTF8MB4_ES_TRAD_0900_AS_CS,
CS_TYPE_UTF8MB4_LA_0900_AS_CS ,
CS_TYPE_UTF8MB4_EO_0900_AS_CS = 296,
CS_TYPE_UTF8MB4_HU_0900_AS_CS ,
CS_TYPE_UTF8MB4_HR_0900_AS_CS ,
CS_TYPE_UTF8MB4_VI_0900_AS_CS = 300,
CS_TYPE_UTF8MB4_JA_0900_AS_CS = 303,
CS_TYPE_UTF8MB4_JA_0900_AS_CS_KS ,
CS_TYPE_UTF8MB4_0900_AS_CI ,
CS_TYPE_UTF8MB4_RU_0900_AI_CI ,
CS_TYPE_UTF8MB4_RU_0900_AS_CS ,
CS_TYPE_UTF8MB4_ZH_0900_AS_CS = 308 ,
CS_TYPE_UTF8MB4_0900_BIN,
CS_TYPE_UTF8MB4_NB_0900_AI_CI ,
CS_TYPE_UTF8MB4_NB_0900_AS_CS ,
CS_TYPE_UTF8MB4_NN_0900_AI_CI ,
CS_TYPE_UTF8MB4_NN_0900_AS_CS ,
CS_TYPE_UTF8MB4_SR_LATN_0900_AI_CI,
CS_TYPE_UTF8MB4_SR_LATN_0900_AS_CS,
CS_TYPE_UTF8MB4_BS_0900_AI_CI ,
CS_TYPE_UTF8MB4_BS_0900_AS_CS ,
CS_TYPE_UTF8MB4_BG_0900_AI_CI ,
CS_TYPE_UTF8MB4_BG_0900_AS_CS ,
CS_TYPE_UTF8MB4_GL_0900_AI_CI ,
CS_TYPE_UTF8MB4_GL_0900_AS_CS ,
CS_TYPE_UTF8MB4_MN_CYRL_0900_AI_CI,
CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS,
//pinyin order (occupied)
CS_TYPE_PINYIN_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH_0900_AS_CS, //308 in mysql 8.0
CS_TYPE_UTF8MB4_ZH_0900_AS_CS_CPY, //308 in mysql 8.0
CS_TYPE_GBK_ZH_0900_AS_CS,
CS_TYPE_UTF16_ZH_0900_AS_CS,
CS_TYPE_GB18030_ZH_0900_AS_CS,
@ -112,6 +237,12 @@ enum ObCollationType
CS_TYPE_GB18030_2022_ZH_0900_AS_CS,
CS_TYPE_ASCII_ZH_0900_AS_CS,
CS_TYPE_TIS620_ZH_0900_AS_CS,
CS_TYPE_UTF16LE_ZH_0900_AS_CS,
CS_TYPE_SJIS_ZH_0900_AS_CS,
CS_TYPE_BIG5_ZH_0900_AS_CS,
CS_TYPE_HKSCS_ZH_0900_AS_CS,
CS_TYPE_HKSCS31_ZH_0900_AS_CS,
CS_TYPE_DEC8_ZH_0900_AS_CS,
//radical-stroke order
CS_TYPE_RADICAL_BEGIN_MARK,
@ -123,7 +254,12 @@ enum ObCollationType
CS_TYPE_GB18030_2022_ZH2_0900_AS_CS,
CS_TYPE_ASCII_ZH2_0900_AS_CS,
CS_TYPE_TIS620_ZH2_0900_AS_CS,
CS_TYPE_UTF16LE_ZH2_0900_AS_CS,
CS_TYPE_SJIS_ZH2_0900_AS_CS,
CS_TYPE_BIG5_ZH2_0900_AS_CS,
CS_TYPE_HKSCS_ZH2_0900_AS_CS,
CS_TYPE_HKSCS31_ZH2_0900_AS_CS,
CS_TYPE_DEC8_ZH2_0900_AS_CS,
//stroke order
CS_TYPE_STROKE_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH3_0900_AS_CS,
@ -134,7 +270,12 @@ enum ObCollationType
CS_TYPE_GB18030_2022_ZH3_0900_AS_CS,
CS_TYPE_ASCII_ZH3_0900_AS_CS,
CS_TYPE_TIS620_ZH3_0900_AS_CS,
CS_TYPE_UTF16LE_ZH3_0900_AS_CS,
CS_TYPE_SJIS_ZH3_0900_AS_CS,
CS_TYPE_BIG5_ZH3_0900_AS_CS,
CS_TYPE_HKSCS_ZH3_0900_AS_CS,
CS_TYPE_HKSCS31_ZH3_0900_AS_CS,
CS_TYPE_DEC8_ZH3_0900_AS_CS,
CS_TYPE_MAX
};
@ -149,9 +290,12 @@ enum ObNlsCharsetId
CHARSET_ZHS16GBK_ID = 852,
CHARSET_ZHS32GB18030_ID = 854,
CHARSET_ZHS32GB18030_2022_ID = 859, // not used in oracle
CHARSET_ZHT16HKSCS_ID = 868,
CHARSET_UTF8_ID = 871,
CHARSET_AL32UTF8_ID = 873,
CHARSET_ZHT16HKSCS31_ID = 992,
CHARSET_AL16UTF16_ID = 2000,
CHARSET_AL16UTF16LE_ID = 2002,
CHARSET_MAX_ID,
};
@ -233,10 +377,8 @@ public:
static const int32_t MAX_CASE_MULTIPLY = 4;
//比如latin1 1byte ,utf8mb4 4byte,转换因子为4,也可以理解为最多使用4字节存储一个字符
static const int32_t CharConvertFactorNum = 4;
static const int64_t VALID_CHARSET_TYPES = 9;
static const int64_t VALID_COLLATION_TYPES = 31;
static const int64_t VALID_CHARSET_TYPES = 15;
static const int64_t VALID_COLLATION_TYPES = 143;
static int init_charset();
// strntodv2 is an enhanced version of strntod,
// which handles nan/infinity values in oracle mode.
@ -407,19 +549,7 @@ public:
static ObCollationType collation_type(const ObString &cs_name);
static bool is_valid_collation(ObCharsetType charset_type, ObCollationType coll_type);
static bool is_valid_collation(int64_t coll_type_int);
static bool is_valid_charset(int64_t cs_type_int)
{
ObCharsetType charset_type = static_cast<ObCharsetType>(cs_type_int);
return CHARSET_BINARY == charset_type
|| CHARSET_UTF8MB4 == charset_type
|| CHARSET_GBK == charset_type
|| CHARSET_UTF16 == charset_type
|| CHARSET_GB18030 == charset_type
|| CHARSET_GB18030_2022 == charset_type
|| CHARSET_LATIN1 == charset_type
|| CHARSET_ASCII == charset_type
|| CHARSET_TIS620 == charset_type;
}
static bool is_valid_charset(int64_t cs_type_int);
static bool is_gb18030_2022(int64_t coll_type_int) {
ObCollationType coll_type = static_cast<ObCollationType>(coll_type_int);
return CS_TYPE_GB18030_2022_BIN <= coll_type && coll_type <= CS_TYPE_GB18030_2022_STROKE_CS;
@ -588,15 +718,20 @@ public:
static int get_nls_charset_id_by_charset_type(ObCharsetType charset_type);
static ObNlsCharsetId charset_type_to_ora_charset_id(ObCharsetType cs_type);
static ObCharsetType ora_charset_type_to_charset_type(ObNlsCharsetId charset_id);
static int trim_end_of_str(const char *buf, int length, char *&trim_end, ObCharsetType ctype);
static bool is_valid_nls_collation(ObNLSCollation nls_collation);
static bool is_valid_ora_charset_id(ObNlsCharsetId charset_id);
static ObCollationType ora_charset_type_to_coll_type(ObNlsCharsetId charset_id);
static ObCollationType get_coll_type_by_nlssort_param(ObCharsetType charset_type,
const ObString &nlssort_param);
private:
static int init_charset_and_arr();
static int init_charset_info_coll_info(ObCharsetInfo *cs, ObCharsetLoader& loader);
static bool is_argument_valid(const ObCharsetInfo *charset_info, const char *str, int64_t str_len);
static bool is_argument_valid(const ObCollationType collation_type, const char *str1, int64_t str_len1, const char *str2, int64_t str_len2);
static int copy_zh_cs(ObCharsetInfo *from_cs, ObCollationType to_coll_type, ObCharsetInfo *&to_cs);
static int copy_zh_cs(ObCharsetInfo *from_cs, ObCharsetType charset_type, ObCharsetInfo *&to_cs);
private:
// disallow copy
DISALLOW_COPY_AND_ASSIGN(ObCharset);
@ -604,6 +739,7 @@ private:
static const ObCharsetWrapper charset_wrap_arr_[VALID_CHARSET_TYPES];
static const ObCollationWrapper collation_wrap_arr_[VALID_COLLATION_TYPES];
static ObCharsetInfo *charset_arr[CS_TYPE_MAX]; // CHARSET_INFO *
static ObCharsetType collation_charset_map[CS_TYPE_MAX];
static ObCharsetType default_charset_type_;
static ObCollationType default_collation_type_;
};
@ -657,7 +793,7 @@ public:
}
static int remove_char_endspace(ObString &str,
const ObCharsetType &charset_type);
const ObCharsetInfo *charsetInfo);
private:
static ObString const_str_for_ascii_[CHARSET_MAX][INT8_MAX + 1];
};

View File

@ -700,6 +700,36 @@ public:
foreach_char_prototype<CHARSET_UTF16, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_UTF16, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_UTF16LE:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_UTF16LE, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_UTF16LE, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_SJIS:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_SJIS, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_SJIS, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_HKSCS:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_HKSCS, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_HKSCS, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_HKSCS31:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_HKSCS31, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_HKSCS31, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_DEC8:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_DEC8, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_DEC8, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_BIG5:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_BIG5, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)
: foreach_char_prototype<CHARSET_BIG5, HANDLE_FUNC, false>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len);
break;
case CHARSET_LATIN1:
ret = convert_unicode ?
foreach_char_prototype<CHARSET_LATIN1, HANDLE_FUNC, true>(str, func, ignore_convert_failed, stop_when_truncated, truncated_len)

View File

@ -26,10 +26,13 @@
#define OB_UTF8MB4_0900_AI_CI OB_UTF8MB4 "_0900_ai_ci"
#define OB_UTF16 "utf16"
#define OB_UTF16LE "utf16le"
#define OB_UTF16_GENERAL_CI OB_UTF16 "_general_ci"
#define OB_UTF16_BIN OB_UTF16 "_bin"
#define OB_UTF16_UNICODE_CI OB_UTF16 "_unicode_ci"
#define OB_UTF16LE_GENERAL_CI OB_UTF16LE "_general_ci"
#define OB_UTF16LE_BIN OB_UTF16LE "_bin"
#define OB_LATIN1 "latin1"
#define OB_LATIN1_SWEDISH_CI OB_LATIN1 "_swedish_ci"
@ -262,6 +265,7 @@ typedef struct ObCharsetHandler
char **endptr, int *error);
size_t (*scan)(const struct ObCharsetInfo *, const char *b,
const char *e, int sq);
const unsigned char * (*skip_trailing_space)(const struct ObCharsetInfo *,const unsigned char *ptr,size_t len);
} ObCharsetHandler;
static const int HASH_BUFFER_LENGTH = 128;
@ -444,6 +448,7 @@ extern ObUniCtype ob_uni_ctype[256];
//=============================================================================
extern ObUnicaseInfo ob_unicase_default;
extern ObUnicaseInfo ob_unicase_turkish;
extern ObUnicaseInfo ob_unicase_unicode520;
//=============================================================================
@ -455,6 +460,8 @@ extern ObCharsetInfo ob_charset_gbk_chinese_ci;
extern ObCharsetInfo ob_charset_gbk_bin;
extern ObCharsetInfo ob_charset_utf16_general_ci;
extern ObCharsetInfo ob_charset_utf16_bin;
extern ObCharsetInfo ob_charset_utf16le_general_ci;
extern ObCharsetInfo ob_charset_utf16le_bin;
extern ObCharsetInfo ob_charset_gb18030_chinese_ci;
extern ObCharsetInfo ob_charset_gb18030_chinese_cs;
extern ObCharsetInfo ob_charset_gb18030_bin;
@ -481,12 +488,24 @@ extern ObCharsetInfo ob_charset_ascii;
extern ObCharsetInfo ob_charset_ascii_bin;
extern ObCharsetInfo ob_charset_tis620_thai_ci;
extern ObCharsetInfo ob_charset_tis620_bin;
extern ObCharsetInfo ob_charset_sjis_japanese_ci;
extern ObCharsetInfo ob_charset_sjis_bin;
extern ObCollationHandler ob_collation_mb_bin_handler;
extern ObCharsetHandler ob_charset_utf8mb4_handler;
extern ObCharsetHandler ob_charset_utf16_handler;
extern ObCharsetHandler ob_charset_utf16le_handler;
extern ObCollationHandler ob_collation_binary_handler;
extern ObCollationHandler ob_collation_8bit_bin_handler;
extern ObCollationHandler ob_collation_8bit_simple_ci_handler;
extern ObCharsetInfo ob_charset_big5_chinese_ci;
extern ObCharsetInfo ob_charset_big5_bin;
extern ObCharsetInfo ob_charset_hkscs_bin;
extern ObCharsetInfo ob_charset_hkscs31_bin;
extern ObCharsetInfo ob_charset_dec8_swedish_ci;
extern ObCharsetInfo ob_charset_dec8_bin;
extern ObCharsetInfo *uca900_collations[];
extern ObCharsetInfo *euro_collations[];
//=============================================================================
void ob_fill_8bit(const ObCharsetInfo *cs, char* to, size_t l, int fill);
@ -569,36 +588,11 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
ulong *nr1, ulong *nr2,
const bool calc_end_space, hash_algo hash_algo);
inline const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16 /*false*/)
{
const static unsigned SPACE_INT = 0x20202020;
const unsigned char *end= ptr + len;
if (len > 20 && !is_utf16) {
const unsigned char *end_words= (const unsigned char *)(int_ptr)
(((ulonglong)(int_ptr)end) / SIZEOF_INT * SIZEOF_INT);
const unsigned char *start_words= (const unsigned char *)(int_ptr)
((((ulonglong)(int_ptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);
ob_charset_assert(((ulonglong)(int_ptr)ptr) >= SIZEOF_INT);
if (end_words > ptr) {
while (end > end_words && end[-1] == 0x20) {
end--;
}
if (end[-1] == 0x20 && start_words < end_words) {
while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) {
end -= SIZEOF_INT;
}
}
}
}
if (is_utf16) {
while (end - 1 > ptr && end[-2] == 0x00 && end[-1] == 0x20)
end-=2;
} else {
while (end > ptr && end[-1] == 0x20)
end--;
}
return (end);
}
int ob_strcasecmp_mb(const ObCharsetInfo *cs, const char *s, const char *t);
const unsigned char *skip_trailing_space(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
const unsigned char *skip_trailing_space_utf16(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
const unsigned char *skip_trailing_space_utf16le(const struct ObCharsetInfo *, const unsigned char *ptr,size_t len);
size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end);
@ -749,6 +743,10 @@ unsigned int ob_ismbchar_8bit(const ObCharsetInfo *cs __attribute__((unused)), c
extern "C" void right_to_die_or_duty_to_live_c();
static inline void OB_PUT_MB2(unsigned char *s, uint16 code) {
s[0] = code >> 8;
s[1] = code & 0xFF;
}
#endif /* OCEANBASE_LIB_OBMYSQL_OB_CTYPE_ */

View File

@ -43,7 +43,9 @@ static ObCharsetHandler ob_charset_ascii_handler = {
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit};
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_ascii = {
11,0,0,

View File

@ -0,0 +1,426 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "lib/charset/ob_ctype.h"
#include "ob_ctype_big5_tab.h"
#define hasbig5head(c) (0xa1 <= (unsigned char)(c) && \
(unsigned char)(c) <= 0xf9)
#define hasbig5tail(c) \
((0x40 <= (unsigned char)(c) && \
(unsigned char)(c) <= 0x7e) || \
(0xa1 <= (unsigned char)(c) && \
(unsigned char)(c) <= 0xfe))
#define isbig5code(c, d) (hasbig5head(c) && hasbig5tail(d))
#define big5code(c, d) (((unsigned char)(c) << 8) | (unsigned char)(d))
#define getbig5head(e) ((unsigned char)(e >> 8))
#define getbig5tail(e) ((unsigned char)(e & 0xff))
static uint16 big5strokexfrm(uint16 i) {
// storke order
if ((i == 0xA440) || (i == 0xA441))
return 0xA440;
else if (((i >= 0xA442) && (i <= 0xA453)) || ((i >= 0xC940) && (i <= 0xC944)))
return 0xA442;
else if (((i >= 0xA454) && (i <= 0xA47E)) || ((i >= 0xC945) && (i <= 0xC94C)))
return 0xA454;
else if (((i >= 0xA4A1) && (i <= 0xA4FD)) || ((i >= 0xC94D) && (i <= 0xC962)))
return 0xA4A1;
else if (((i >= 0xA4FE) && (i <= 0xA5DF)) || ((i >= 0xC963) && (i <= 0xC9AA)))
return 0xA4FE;
else if (((i >= 0xA5E0) && (i <= 0xA6E9)) || ((i >= 0xC9AB) && (i <= 0xCA59)))
return 0xA5E0;
else if (((i >= 0xA6EA) && (i <= 0xA8C2)) || ((i >= 0xCA5A) && (i <= 0xCBB0)))
return 0xA6EA;
else if ((i == 0xA260) || ((i >= 0xA8C3) && (i <= 0xAB44)) ||
((i >= 0xCBB1) && (i <= 0xCDDC)))
return 0xA8C3;
else if ((i == 0xA259) || (i == 0xF9DA) || ((i >= 0xAB45) && (i <= 0xADBB)) ||
((i >= 0xCDDD) && (i <= 0xD0C7)))
return 0xAB45;
else if ((i == 0xA25A) || ((i >= 0xADBC) && (i <= 0xB0AD)) ||
((i >= 0xD0C8) && (i <= 0xD44A)))
return 0xADBC;
else if ((i == 0xA25B) || (i == 0xA25C) || ((i >= 0xB0AE) && (i <= 0xB3C2)) ||
((i >= 0xD44B) && (i <= 0xD850)))
return 0xB0AE;
else if ((i == 0xF9DB) || ((i >= 0xB3C3) && (i <= 0xB6C2)) ||
((i >= 0xD851) && (i <= 0xDCB0)))
return 0xB3C3;
else if ((i == 0xA25D) || (i == 0xA25F) || (i == 0xC6A1) || (i == 0xF9D6) ||
(i == 0xF9D8) || ((i >= 0xB6C3) && (i <= 0xB9AB)) ||
((i >= 0xDCB1) && (i <= 0xE0EF)))
return 0xB6C3;
else if ((i == 0xF9DC) || ((i >= 0xB9AC) && (i <= 0xBBF4)) ||
((i >= 0xE0F0) && (i <= 0xE4E5)))
return 0xB9AC;
else if ((i == 0xA261) || ((i >= 0xBBF5) && (i <= 0xBEA6)) ||
((i >= 0xE4E6) && (i <= 0xE8F3)))
return 0xBBF5;
else if ((i == 0xA25E) || (i == 0xF9D7) || (i == 0xF9D9) ||
((i >= 0xBEA7) && (i <= 0xC074)) || ((i >= 0xE8F4) && (i <= 0xECB8)))
return 0xBEA7;
else if (((i >= 0xC075) && (i <= 0xC24E)) || ((i >= 0xECB9) && (i <= 0xEFB6)))
return 0xC075;
else if (((i >= 0xC24F) && (i <= 0xC35E)) || ((i >= 0xEFB7) && (i <= 0xF1EA)))
return 0xC24F;
else if (((i >= 0xC35F) && (i <= 0xC454)) || ((i >= 0xF1EB) && (i <= 0xF3FC)))
return 0xC35F;
else if (((i >= 0xC455) && (i <= 0xC4D6)) || ((i >= 0xF3FD) && (i <= 0xF5BF)))
return 0xC455;
else if (((i >= 0xC4D7) && (i <= 0xC56A)) || ((i >= 0xF5C0) && (i <= 0xF6D5)))
return 0xC4D7;
else if (((i >= 0xC56B) && (i <= 0xC5C7)) || ((i >= 0xF6D6) && (i <= 0xF7CF)))
return 0xC56B;
else if (((i >= 0xC5C8) && (i <= 0xC5F0)) || ((i >= 0xF7D0) && (i <= 0xF8A4)))
return 0xC5C8;
else if (((i >= 0xC5F1) && (i <= 0xC654)) || ((i >= 0xF8A5) && (i <= 0xF8ED)))
return 0xC5F1;
else if (((i >= 0xC655) && (i <= 0xC664)) || ((i >= 0xF8EE) && (i <= 0xF96A)))
return 0xC655;
else if (((i >= 0xC665) && (i <= 0xC66B)) || ((i >= 0xF96B) && (i <= 0xF9A1)))
return 0xC665;
else if (((i >= 0xC66C) && (i <= 0xC675)) || ((i >= 0xF9A2) && (i <= 0xF9B9)))
return 0xC66C;
else if (((i >= 0xC676) && (i <= 0xC678)) || ((i >= 0xF9BA) && (i <= 0xF9C5)))
return 0xC676;
else if (((i >= 0xC679) && (i <= 0xC67C)) || ((i >= 0xF9C7) && (i <= 0xF9CB)))
return 0xC679;
else if ((i == 0xC67D) || ((i >= 0xF9CC) && (i <= 0xF9CF)))
return 0xC67D;
else if (i == 0xF9D0)
return 0xF9D0;
else if ((i == 0xC67E) || (i == 0xF9D1))
return 0xC67E;
else if ((i == 0xF9C6) || (i == 0xF9D2))
return 0xF9C6;
else if (i == 0xF9D3)
return 0xF9D3;
else if (i == 0xF9D4)
return 0xF9D4;
else if (i == 0xF9D5)
return 0xF9D5;
return 0xA140;
}
static int ob_strnncoll_big5_internal(const unsigned char **a_res, const unsigned char **b_res,
size_t length) {
const unsigned char *a = *a_res, *b = *b_res;
while (length--) {
if ((length > 0) && isbig5code(*a, *(a + 1)) && isbig5code(*b, *(b + 1))) {
if (*a != *b || *(a + 1) != *(b + 1))
return ((int)big5code(*a, *(a + 1)) - (int)big5code(*b, *(b + 1)));
a += 2;
b += 2;
length--;
} else if (sort_order_big5[*a++] != sort_order_big5[*b++])
return ((int)sort_order_big5[a[-1]] - (int)sort_order_big5[b[-1]]);
}
*a_res = a;
*b_res = b;
return 0;
}
/* Compare strings */
extern "C" {
static int ob_strnncoll_big5(const ObCharsetInfo *cs [[maybe_unused]],
const unsigned char *a, size_t a_length, const unsigned char *b,
size_t b_length, bool b_is_prefix) {
size_t length = std::min(a_length, b_length);
int res = ob_strnncoll_big5_internal(&a, &b, length);
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
}
/* compare strings, ignore end space */
static int ob_strnncollsp_big5(const ObCharsetInfo *cs [[maybe_unused]],
const unsigned char *a, size_t a_length, const unsigned char *b,
size_t b_length, bool diff_if_only_endspace_difference) {
size_t length = std::min(a_length, b_length);
int res = ob_strnncoll_big5_internal(&a, &b, length);
if (!res && a_length != b_length) {
const unsigned char *end;
int swap = 1;
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length) {
/* put longer key in a */
a_length = b_length;
a = b;
swap = -1; /* swap sign of result */
res = -res;
}
for (end = a + a_length - length; a < end; a++) {
if (*a != ' ') return (*a < ' ') ? -swap : swap;
}
}
return res;
}
static size_t ob_strnxfrm_big5(const ObCharsetInfo *cs, unsigned char *dst,
size_t dstlen, uint nweights, const unsigned char *src,
size_t srclen, uint flags, bool *is_valid_unicode) {
unsigned char *d0 = dst;
unsigned char *de = dst + dstlen;
const unsigned char *se = src + srclen;
const unsigned char *sort_order = cs->sort_order;
for (; dst < de && src < se && nweights; nweights--) {
if (cs->cset->ismbchar(cs, (const char *)src, (const char *)se)) {
/*
Note, it is safe not to check (src < se)
in the code below, because ismbchar() would
not return TRUE if src was too short
*/
uint16 e = big5strokexfrm((uint16)big5code(*src, *(src + 1)));
*dst++ = getbig5head(e);
if (dst < de) *dst++ = getbig5tail(e);
src += 2;
} else
*dst++ = sort_order ? sort_order[*src++] : *src++;
}
return ob_strxfrm_pad(cs, d0, dst, de, nweights, flags);
}
static unsigned int ismbchar_big5(const ObCharsetInfo *cs [[maybe_unused]],
const char *p, const char *e) {
return (hasbig5head(*(p)) && (e) - (p) > 1 && hasbig5tail(*((p) + 1)) ? 2 : 0);
}
static unsigned int mbcharlen_big5(const ObCharsetInfo *cs [[maybe_unused]], uint c) {
return (hasbig5head(c) ? 2 : 1);
}
/*
Returns a well formed length of a BIG5 string.
CP950 and SCS additional characters are also accepted.
*/
static size_t ob_well_formed_len_big5(const ObCharsetInfo *cs [[maybe_unused]],
const char *b, const char *e, size_t pos,
int *error) {
const char *b0 = b;
const char *emb = e - 1; /* Last possible end of an MB character */
*error = 0;
while (pos-- && b < e) {
if ((unsigned char)b[0] < 128) {
/* Single byte ascii character */
b++;
} else if ((b < emb) && isbig5code((unsigned char)*b, (unsigned char)b[1])) {
/* Double byte character */
b += 2;
} else {
/* Wrong byte sequence */
*error = 1;
break;
}
}
return (size_t)(b - b0);
}
}
static ObUnicaseInfo ob_caseinfo_big5 = {0xFFFF, ob_caseinfo_pages_big5};
static int func_big5_uni_onechar(int code) {
if ((code >= 0xA140) && (code <= 0xC7FC))
return (tab_big5_uni0[code - 0xA140]);
if ((code >= 0xC940) && (code <= 0xF9DC))
return (tab_big5_uni1[code - 0xC940]);
return (0);
}
extern "C" {
static int func_uni_big5_onechar(int code) {
if ((code >= 0x00A2) && (code <= 0x00F7))
return (tab_uni_big50[code - 0x00A2]);
if ((code >= 0x02C7) && (code <= 0x0451))
return (tab_uni_big51[code - 0x02C7]);
if ((code >= 0x2013) && (code <= 0x22BF))
return (tab_uni_big52[code - 0x2013]);
if ((code >= 0x2460) && (code <= 0x2642))
return (tab_uni_big53[code - 0x2460]);
if ((code >= 0x3000) && (code <= 0x3129))
return (tab_uni_big54[code - 0x3000]);
if ((code >= 0x32A3) && (code <= 0x32A3))
return (tab_uni_big55[code - 0x32A3]);
if ((code >= 0x338E) && (code <= 0x33D5))
return (tab_uni_big56[code - 0x338E]);
if ((code >= 0x4E00) && (code <= 0x9483))
return (tab_uni_big57[code - 0x4E00]);
if ((code >= 0x9577) && (code <= 0x9FA4))
return (tab_uni_big58[code - 0x9577]);
if ((code >= 0xFA0C) && (code <= 0xFA0D))
return (tab_uni_big59[code - 0xFA0C]);
if ((code >= 0xFE30) && (code <= 0xFFFD))
return (tab_uni_big510[code - 0xFE30]);
return (0);
}
static int ob_wc_mb_big5(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
unsigned char *s, unsigned char *e) {
int code;
if (s >= e) return OB_CS_TOOSMALL;
if ((int)wc < 0x80) {
s[0] = (unsigned char)wc;
return 1;
}
if (!(code = func_uni_big5_onechar(wc))) return OB_CS_ILUNI;
if (s + 2 > e) return OB_CS_TOOSMALL;
s[0] = code >> 8;
s[1] = code & 0xFF;
return 2;
}
static int ob_mb_wc_big5(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
const unsigned char *s, const unsigned char *e) {
int hi;
if (s >= e) return OB_CS_TOOSMALL;
if ((hi = s[0]) < 0x80) {
pwc[0] = hi;
return 1;
}
if (s + 2 > e) return OB_CS_TOOSMALL2;
if (!(pwc[0] = func_big5_uni_onechar((hi << 8) + s[1]))) return -2;
return 2;
}
}
static ObCollationHandler ob_collation_big5_chinese_ci_handler = {
NULL, /* init */
NULL,
ob_strnncoll_big5,
ob_strnncollsp_big5,
ob_strnxfrm_big5,
ob_strnxfrmlen_simple,
NULL,
ob_like_range_mb,
ob_wildcmp_mb,
ob_strcasecmp_mb,
ob_instr_mb,
ob_hash_sort_simple,
ob_propagate_simple};
static ObCharsetHandler ob_charset_big5_handler = {NULL,
ismbchar_big5,
mbcharlen_big5,
ob_numchars_mb,
ob_charpos_mb,
ob_max_bytes_charpos_mb, /* max_byptes charpos */
ob_well_formed_len_big5,
ob_lengthsp_8bit,
ob_mb_wc_big5, /* mb_wc */
ob_wc_mb_big5, /* wc_mb */
ob_mb_ctype_mb,
ob_caseup_mb,
ob_casedn_mb,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit,
skip_trailing_space};
ObCharsetInfo ob_charset_big5_chinese_ci = {
1,
0,
0, /* number */
OB_CS_COMPILED | OB_CS_PRIMARY | OB_CS_STRNXFRM, /* state */
"big5", /* cs name */
"big5_chinese_ci", /* m_coll_name */
"Big5 Traditional Chinese", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_big5,
to_lower_big5,
to_upper_big5,
sort_order_big5,
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&ob_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
0xF9D5, /* max_sort_char */
' ', /* pad char */
true, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_big5_handler,
&ob_collation_big5_chinese_ci_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_big5_bin = {
84,
0,
0, /* number */
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
"big5", /* cs name */
"big5_bin", /* m_coll_name */
"Big5 Traditional Chinese", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_big5,
to_lower_big5,
to_upper_big5,
NULL,
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&ob_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
0xF9FE, /* max_sort_char */
' ', /* pad char */
true, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_big5_handler,
&ob_collation_mb_bin_handler,
PAD_SPACE};

File diff suppressed because it is too large Load Diff

View File

@ -343,14 +343,14 @@ loop:
}
return 0;
}
void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs,
const uchar *key, size_t len, ulong *nr1, ulong *nr2, const bool calc_end_space, hash_algo hash_algo)
{
const uchar *pos = key;
key += len;
//trailing space to make 'A ' == 'A'
if (!calc_end_space) {
key = skip_trailing_space(pos, len, 0);
key = cs->cset->skip_trailing_space(cs, pos, len);
}
if (NULL == hash_algo)
{
@ -413,7 +413,8 @@ static ObCharsetHandler ob_charset_handler=
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
ob_scan_8bit,
skip_trailing_space
};
ObCollationHandler ob_collation_8bit_bin_handler =

View File

@ -0,0 +1,322 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "lib/charset/ob_ctype.h"
static unsigned char ctype_dec8_swedish_ci[] = {
0x00,
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x28,0x28,0x28,0x28,0x28,0x20,0x20,
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x10,0x10,0x10,0x10,0x10,0x10,
0x10,0x81,0x81,0x81,0x81,0x81,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x10,0x10,0x10,0x10,
0x10,0x82,0x82,0x82,0x82,0x82,0x82,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x10,0x10,0x10,0x20,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02
};
static unsigned char to_lower_dec8_swedish_ci[] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
};
static unsigned char to_upper_dec8_swedish_ci[] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xF7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xFF
};
static unsigned char sort_order_dec8_swedish_ci[] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0x41,0x41,0x41,0x41,0x5C,0x5B,0x5C,0x43,0x45,0x45,0x45,0x45,0x49,0x49,0x49,0x49,
0x44,0x4E,0x4F,0x4F,0x4F,0x4F,0x5D,0xD7,0xD8,0x55,0x55,0x55,0x59,0x59,0xDE,0xDF,
0x41,0x41,0x41,0x41,0x5C,0x5B,0x5C,0x43,0x45,0x45,0x45,0x45,0x49,0x49,0x49,0x49,
0x44,0x4E,0x4F,0x4F,0x4F,0x4F,0x5D,0xF7,0xD8,0x55,0x55,0x55,0x59,0x59,0xDE,0xFF
};
static uint16 to_uni_dec8_swedish_ci[] = {
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
0x00A0,0x00A1,0x00A2,0x00A3,0x0000,0x00A5,0x0000,0x00A7,
0x00A4,0x00A9,0x00AA,0x00AB,0x0000,0x0000,0x0000,0x0000,
0x00B0,0x00B1,0x00B2,0x00B3,0x0000,0x00B5,0x00B6,0x00B7,
0x0000,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x0000,0x00BF,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x0152,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0178,0x0000,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x0153,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FF,0x0000,0x0000
};
static unsigned char ctype_dec8_bin[] = {
0x00,
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x28,0x28,0x28,0x28,0x28,0x20,0x20,
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x10,0x10,0x10,0x10,0x10,0x10,
0x10,0x81,0x81,0x81,0x81,0x81,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x10,0x10,0x10,0x10,
0x10,0x82,0x82,0x82,0x82,0x82,0x82,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x10,0x10,0x10,0x20,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x48,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x10,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x10,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02
};
static unsigned char to_lower_dec8_bin[] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
};
static unsigned char to_upper_dec8_bin[] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xF7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xFF
};
static uint16 to_uni_dec8_bin[] = {
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
0x00A0,0x00A1,0x00A2,0x00A3,0x0000,0x00A5,0x0000,0x00A7,
0x00A4,0x00A9,0x00AA,0x00AB,0x0000,0x0000,0x0000,0x0000,
0x00B0,0x00B1,0x00B2,0x00B3,0x0000,0x00B5,0x00B6,0x00B7,
0x0000,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x0000,0x00BF,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x0000,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x0152,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x0178,0x0000,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x0000,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x0153,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FF,0x0000,0x0000
};
static ObCharsetHandler ob_charset_8bit_handler = {
ob_cset_init_8bit,
NULL,
ob_mbcharlen_8bit,
ob_numchars_8bit,
ob_charpos_8bit,
ob_max_bytes_charpos_8bit,
ob_well_formed_len_8bit,
ob_lengthsp_8bit,
//ob_numcells_8bit,
ob_mb_wc_8bit,
ob_wc_mb_8bit,
ob_mb_ctype_8bit,
//ob_caseup_str_8bit,
//ob_casedn_str_8bit,
ob_caseup_8bit,
ob_casedn_8bit,
//ob_snprintf_8bit,
//ob_long10_to_str_8bit,
//ob_longlong10_to_str_8bit,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_dec8_swedish_ci = {
3,0,0,
OB_CS_COMPILED|OB_CS_PRIMARY,
"dec8", /* csname */
"dec8_swedish_ci", /* m_collname */
"DEC West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_dec8_swedish_ci, /* ctype */
to_lower_dec8_swedish_ci, /* to_lower */
to_upper_dec8_swedish_ci, /* to_upper */
sort_order_dec8_swedish_ci, /* sort_order */
NULL, /* uca */
to_uni_dec8_swedish_ci, /* to_uni */
NULL, /* from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad_char */
false, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_8bit_handler,
&ob_collation_8bit_simple_ci_handler,
PAD_SPACE /* pad_attribute */
};
ObCharsetInfo ob_charset_dec8_bin = {
69,0,0,
OB_CS_COMPILED|OB_CS_BINSORT,
"dec8", /* csname */
"dec8_bin", /* m_collname */
"DEC West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_dec8_bin, /* ctype */
to_lower_dec8_bin, /* to_lower */
to_upper_dec8_bin, /* to_upper */
NULL, /* sort_order */
NULL, /* uca */
to_uni_dec8_bin, /* to_uni */
NULL, /* from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad_char */
false, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_8bit_handler,
&ob_collation_8bit_bin_handler,
PAD_SPACE /* pad_attribute */
};

View File

@ -989,7 +989,9 @@ static ObCharsetHandler ob_charset_gb18030_handler = {
ob_strntod_8bit,
// my_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit};
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_gb18030_chinese_ci = {
oceanbase::common::CS_TYPE_GB18030_CHINESE_CI,
@ -1620,7 +1622,8 @@ static ObCharsetHandler ob_charset_gb18030_2022_handler =
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_gb18030_2022_bin =

View File

@ -368,7 +368,8 @@ static ObCharsetHandler ob_charset_gbk_handler=
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
ob_scan_8bit,
skip_trailing_space
};

View File

@ -0,0 +1,349 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "lib/charset/ob_ctype.h"
#include "ob_ctype_hkscs_tab.h"
#include "ob_ctype_hkscs31_tab.h"
#include "ob_template_helper.h"
/*
this is different form mysql is hkscs for the newly added char in hkscs
*/
#define ishkscshead(c) (0x81 <= (unsigned char)(c) && (unsigned char)(c) <= 0xfe)
#define ishkscstail(c) \
((0x40 <= (unsigned char)(c) && (unsigned char)(c) <= 0x7e) || \
(0xa1 <= (unsigned char)(c) && (unsigned char)(c) <= 0xfe))
#define ishkscscode(c, d) (ishkscshead(c) && ishkscstail(d))
#define hkscscode(c, d) (((unsigned char)(c) << 8) | (unsigned char)(d))
#define hkscshead(e) ((unsigned char)(e >> 8))
#define hkscstail(e) ((unsigned char)(e & 0xff))
extern "C" {
static unsigned int ismbchar_hkscs(const ObCharsetInfo *cs [[maybe_unused]],
const char *p, const char *e) {
return (ishkscshead(*(p)) && (e) - (p) > 1 && ishkscstail(*((p) + 1)) ? 2 : 0);
}
static unsigned int mbcharlen_hkscs(const ObCharsetInfo *cs [[maybe_unused]], uint c) {
return (ishkscshead(c) ? 2 : 1);
}
/*
Returns a well formed length of a hkscs string.
CP950 and HKSCS additional characters are also accepted.
*/
static size_t ob_well_formed_len_hkscs(const ObCharsetInfo *cs [[maybe_unused]],
const char *b, const char *e, size_t pos,
int *error) {
const char *b0 = b;
const char *emb = e - 1; /* Last possible end of an MB character */
*error = 0;
while (pos-- && b < e) {
if ((unsigned char)b[0] < 128) {
/* Single byte ascii character */
b++;
} else if ((b < emb) && ishkscscode((unsigned char)*b, (unsigned char)b[1])) {
/* Double byte character */
b += 2;
} else {
/* Wrong byte sequence */
*error = 1;
break;
}
}
return (size_t)(b - b0);
}
}
static ObUnicaseInfo ob_caseinfo_hk = {0xFFFF, ob_caseinfo_pages_hkscs};
static int func_hkscs_uni_onechar(int code) {
auto iter = hkscs_to_uni_map.find(code);
if (iter != hkscs_to_uni_map.end()) {
return iter->second;
}
return (0);
}
static int func_uni_hkscs_onechar(int code) {
auto iter = uni_to_hkscs_map.find(code);
if (iter != uni_to_hkscs_map.end()) {
return iter->second;
}
return (0);
}
static int func_hkscs31_uni_onechar(int code) {
auto iter = hkscs31_to_uni_map.find(code);
if (iter != hkscs31_to_uni_map.end()) {
return iter->second;
}
return (0);
}
static int func_uni_hkscs31_onechar(int code) {
auto iter = uni_to_hkscs31_map.find(code);
if (iter != uni_to_hkscs31_map.end()) {
return iter->second;
}
return (0);
}
extern "C" {
// unicode to hkscs
static int ob_wc_mb_hkscs(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
unsigned char *s, unsigned char *e) {
int code;
if (s >= e) return OB_CS_TOOSMALL;
if ((int)wc < 0x80) {
s[0] = (uchar)wc;
return 1;
}
if (!(code = func_uni_hkscs_onechar(wc))) return OB_CS_ILUNI;
if (s + 2 > e) return OB_CS_TOOSMALL;
s[0] = code >> 8;
s[1] = code & 0xFF;
return 2;
}
// hkscs to unicode
static int ob_mb_wc_hkscs(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
const unsigned char *s, const unsigned char *e) {
int hi;
if (s >= e) return OB_CS_TOOSMALL;
if ((hi = s[0]) < 0x80) {
pwc[0] = hi;
return 1;
}
if (s + 2 > e) return OB_CS_TOOSMALL2;
if (!(pwc[0] = func_hkscs_uni_onechar((hi << 8) + s[1]))) return -2;
return 2;
}
// unicode to hkscs31
static int ob_wc_mb_hkscs31(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t wc,
unsigned char *s, unsigned char *e) {
int code;
if (s >= e) return OB_CS_TOOSMALL;
if ((int)wc < 0x80) {
s[0] = (uchar)wc;
return 1;
}
if (!(code = func_uni_hkscs31_onechar(wc))) return OB_CS_ILUNI;
if (s + 2 > e) return OB_CS_TOOSMALL;
s[0] = code >> 8;
s[1] = code & 0xFF;
return 2;
}
// hkscs31 to unicode
static int ob_mb_wc_hkscs31(const ObCharsetInfo *cs [[maybe_unused]], ob_wc_t *pwc,
const unsigned char *s, const unsigned char *e) {
int hi;
if (s >= e) return OB_CS_TOOSMALL;
if ((hi = s[0]) < 0x80) {
pwc[0] = hi;
return 1;
}
if (s + 2 > e) return OB_CS_TOOSMALL2;
if (!(pwc[0] = func_hkscs31_uni_onechar((hi << 8) + s[1]))) return -2;
return 2;
}
}
bool hkscs_init(ObCharsetInfo *cs, ObCharsetLoader *loader) {
bool succ = true;
pair<decltype(hkscs_to_uni_map.begin()), bool> ret;
if (hkscs_to_uni_map.size() == 0) {
for (int i = 0; i < array_elements(hkscs_to_uni_map_array) && succ; ++i) {
ret = hkscs_to_uni_map.insert(hkscs_to_uni_map_array[i]);
succ = succ && ret.second;
}
}
if (succ && uni_to_hkscs_map.size() == 0) {
for (int i = 0; i < array_elements(uni_to_hkscs_map_array) && succ; ++i) {
ret = uni_to_hkscs_map.insert(uni_to_hkscs_map_array[i]);
succ = succ && ret.second;
}
}
hkscs_to_uni_map.rehash(20019);
uni_to_hkscs_map.rehash(20019);
return succ;
}
bool hkscs31_init(ObCharsetInfo *cs, ObCharsetLoader *loader) {
bool succ = true;
pair<decltype(hkscs31_to_uni_map.begin()), bool> ret;
if (hkscs31_to_uni_map.size() == 0) {
for (int i = 0; i < array_elements(hkscs31_to_uni_map_array) && succ; ++i) {
ret = hkscs31_to_uni_map.insert(hkscs31_to_uni_map_array[i]);
succ = succ && ret.second;
}
}
pair<decltype(uni_to_hkscs31_map.begin()), bool> rett;
if (succ && uni_to_hkscs31_map.size() == 0) {
for (int i = 0; i < array_elements(uni_to_hkscs31_map_array) && succ; ++i) {
rett = uni_to_hkscs31_map.insert(uni_to_hkscs31_map_array[i]);
succ = succ && rett.second;
}
}
hkscs31_to_uni_map.rehash(20019);
uni_to_hkscs31_map.rehash(20019);
return succ;
}
static ObCharsetHandler ob_charset_hkscs_handler = {
hkscs_init,
ismbchar_hkscs,
mbcharlen_hkscs,
ob_numchars_mb,
ob_charpos_mb,
ob_max_bytes_charpos_mb, /* max_bytes charpos */
ob_well_formed_len_hkscs,
ob_lengthsp_8bit,
ob_mb_wc_hkscs, /* mb_wc */
ob_wc_mb_hkscs, /* wc_mb */
ob_mb_ctype_mb,
ob_caseup_mb,
ob_casedn_mb,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit,
skip_trailing_space
};
static ObCharsetHandler ob_charset_hkscs31_handler = {
hkscs31_init,
ismbchar_hkscs,
mbcharlen_hkscs,
ob_numchars_mb,
ob_charpos_mb,
ob_max_bytes_charpos_mb, /* max_bytes charpos */
ob_well_formed_len_hkscs,
ob_lengthsp_8bit,
ob_mb_wc_hkscs31, /* mb_wc */
ob_wc_mb_hkscs31, /* wc_mb */
ob_mb_ctype_mb,
ob_caseup_mb,
ob_casedn_mb,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_hkscs_bin = {
152,
0,
0, /* number */
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
"hkscs", /* cs name */
"hkscs_bin", /* m_coll_name */
"HKSCS 2000 Traditional Chinese", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_hkscs,
to_lower_hkscs,
to_upper_hkscs,
NULL,
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&ob_caseinfo_hk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
0xFEFE, /* max_sort_char */
' ', /* pad char */
true, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_hkscs_handler,
&ob_collation_mb_bin_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_hkscs31_bin = {
153,
0,
0, /* number */
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
"hkscs31", /* cs name */
"hkscs31_bin", /* m_coll_name */
"HKSCS 2001 Traditional Chinese", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_hkscs,
to_lower_hkscs,
to_upper_hkscs,
NULL,
NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
&ob_caseinfo_hk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
1, /* mbmaxlenlen */
0, /* min_sort_char */
0xFEFE, /* max_sort_char */
' ', /* pad char */
true, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_hkscs31_handler,
&ob_collation_mb_bin_handler,
PAD_SPACE};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -67,7 +67,8 @@ static ObCharsetHandler ob_charset_latin1_handler=
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_latin1 = {

View File

@ -238,6 +238,28 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
return (str != str_end ? 1 : 0);
}
/*
ob_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
*/
int ob_strcasecmp_mb(const ObCharsetInfo *cs, const char *s, const char *t) {
uint32 l;
const uchar *map = cs->to_upper;
while (*s && *t) {
/* Pointing after the '\0' is safe here. */
if ((l = ob_ismbchar(cs, s, s + cs->mbmaxlen))) {
while (l--)
if (*s++ != *t++) return 1;
} else if (ob_mbcharlen(cs, *t) != 1 ||
map[(uchar)*s++] != map[(uchar)*t++])
return 1;
}
/* At least one of '*s' and '*t' is zero here. */
assert(!*t || !*s);
return (*t != *s);
}
unsigned int __attribute__ ((noinline)) ob_instr_mb_help(size_t s_length, ob_match_t *match, unsigned int nmatch)
{
if (!s_length) {
@ -439,7 +461,7 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end;
end= (const char *) skip_trailing_space((const unsigned char *)ptr, length, 0);
end = (const char *) cs->cset->skip_trailing_space(cs, (const unsigned char *)ptr, length); // 8bit not utf16
return (size_t) (end-ptr);
}
@ -693,7 +715,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
str += mb_len;
break;
}
} else if (!ob_ismbchar(cs, str, str_end) && *str == cmp) {
} else if (!ob_ismbchar(cs, str, str_end) && static_cast<unsigned char>(*str) == cmp) {
str++;
break;
}
@ -723,14 +745,14 @@ int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
escape_char, w_one, w_many, 1);
}
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs,
const unsigned char *key, size_t len,unsigned long int *nr1, unsigned long int *nr2,
const bool calc_end_space, hash_algo hash_algo)
{
const unsigned char *pos = key;
if (!calc_end_space) {
key= skip_trailing_space(key, len, 0);
key = cs->cset->skip_trailing_space(cs, key, len); // use in utf8 not utf16
} else {
key += len;
}

View File

@ -24,7 +24,6 @@
#define CUTOFF (UINT64_MAX / 10)
#define CUTLIM (UINT64_MAX % 10)
#define SPACE_INT 0x20202020
#define DIGITS_IN_ULONGLONG 20
#define PLANE_SIZE 0x100
#define PLANE_NUM 0x100
@ -765,7 +764,7 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
const unsigned char *end;
unsigned char data[HASH_BUFFER_LENGTH];
int length = 0;
end= calc_end_space ? key + len : skip_trailing_space(key, len, 0);
end= calc_end_space ? key + len : cs->cset->skip_trailing_space(cs, key, len); // used in gbk sjis tis620
if (NULL == hash_algo) {
for (; key < (unsigned char*) end ; key++) {
@ -785,7 +784,32 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
}
}
#define SPACE_INT 0x20202020
const unsigned char *skip_trailing_space(const struct ObCharsetInfo *cs __attribute__((unused)),const unsigned char *ptr,size_t len)
{
const static unsigned SPACE_INT = 0x20202020;
const unsigned char *end= ptr + len;
if (len > 20) {
const unsigned char *end_words= (const unsigned char *)(int_ptr)
(((ulonglong)(int_ptr)end) / SIZEOF_INT * SIZEOF_INT);
const unsigned char *start_words= (const unsigned char *)(int_ptr)
((((ulonglong)(int_ptr)ptr) + SIZEOF_INT - 1) / SIZEOF_INT * SIZEOF_INT);
ob_charset_assert(((ulonglong)(int_ptr)ptr) >= SIZEOF_INT);
if (end_words > ptr) {
while (end > end_words && end[-1] == 0x20) {
end--;
}
if (end[-1] == 0x20 && start_words < end_words) {
while (end > start_words && ((unsigned *)end)[-1] == SPACE_INT) {
end -= SIZEOF_INT;
}
}
}
}
while (end > ptr && end[-1] == 0x20)
end--;
return (end);
}
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char *frm_end,
unsigned char *str_end, unsigned int nweights, unsigned int flags) {

18089
deps/oblib/src/lib/charset/ob_ctype_sjis.cc vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -260,7 +260,9 @@ static ObCharsetHandler ob_charset_tis620_handler = {
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit};
ob_scan_8bit,
skip_trailing_space
};
ObCharsetInfo ob_charset_tis620_thai_ci = {

File diff suppressed because it is too large Load Diff

View File

@ -513,6 +513,182 @@ ObUCAInfo ob_uca_v520 = {
0,
0
};
static ObUnicaseInfoChar turk00[] = {
{0x0000, 0x0000, 0x0000}, {0x0001, 0x0001, 0x0001},
{0x0002, 0x0002, 0x0002}, {0x0003, 0x0003, 0x0003},
{0x0004, 0x0004, 0x0004}, {0x0005, 0x0005, 0x0005},
{0x0006, 0x0006, 0x0006}, {0x0007, 0x0007, 0x0007},
{0x0008, 0x0008, 0x0008}, {0x0009, 0x0009, 0x0009},
{0x000A, 0x000A, 0x000A}, {0x000B, 0x000B, 0x000B},
{0x000C, 0x000C, 0x000C}, {0x000D, 0x000D, 0x000D},
{0x000E, 0x000E, 0x000E}, {0x000F, 0x000F, 0x000F},
{0x0010, 0x0010, 0x0010}, {0x0011, 0x0011, 0x0011},
{0x0012, 0x0012, 0x0012}, {0x0013, 0x0013, 0x0013},
{0x0014, 0x0014, 0x0014}, {0x0015, 0x0015, 0x0015},
{0x0016, 0x0016, 0x0016}, {0x0017, 0x0017, 0x0017},
{0x0018, 0x0018, 0x0018}, {0x0019, 0x0019, 0x0019},
{0x001A, 0x001A, 0x001A}, {0x001B, 0x001B, 0x001B},
{0x001C, 0x001C, 0x001C}, {0x001D, 0x001D, 0x001D},
{0x001E, 0x001E, 0x001E}, {0x001F, 0x001F, 0x001F},
{0x0020, 0x0020, 0x0020}, {0x0021, 0x0021, 0x0021},
{0x0022, 0x0022, 0x0022}, {0x0023, 0x0023, 0x0023},
{0x0024, 0x0024, 0x0024}, {0x0025, 0x0025, 0x0025},
{0x0026, 0x0026, 0x0026}, {0x0027, 0x0027, 0x0027},
{0x0028, 0x0028, 0x0028}, {0x0029, 0x0029, 0x0029},
{0x002A, 0x002A, 0x002A}, {0x002B, 0x002B, 0x002B},
{0x002C, 0x002C, 0x002C}, {0x002D, 0x002D, 0x002D},
{0x002E, 0x002E, 0x002E}, {0x002F, 0x002F, 0x002F},
{0x0030, 0x0030, 0x0030}, {0x0031, 0x0031, 0x0031},
{0x0032, 0x0032, 0x0032}, {0x0033, 0x0033, 0x0033},
{0x0034, 0x0034, 0x0034}, {0x0035, 0x0035, 0x0035},
{0x0036, 0x0036, 0x0036}, {0x0037, 0x0037, 0x0037},
{0x0038, 0x0038, 0x0038}, {0x0039, 0x0039, 0x0039},
{0x003A, 0x003A, 0x003A}, {0x003B, 0x003B, 0x003B},
{0x003C, 0x003C, 0x003C}, {0x003D, 0x003D, 0x003D},
{0x003E, 0x003E, 0x003E}, {0x003F, 0x003F, 0x003F},
{0x0040, 0x0040, 0x0040}, {0x0041, 0x0061, 0x0041},
{0x0042, 0x0062, 0x0042}, {0x0043, 0x0063, 0x0043},
{0x0044, 0x0064, 0x0044}, {0x0045, 0x0065, 0x0045},
{0x0046, 0x0066, 0x0046}, {0x0047, 0x0067, 0x0047},
{0x0048, 0x0068, 0x0048}, {0x0049, 0x0131, 0x0049},
{0x004A, 0x006A, 0x004A}, {0x004B, 0x006B, 0x004B},
{0x004C, 0x006C, 0x004C}, {0x004D, 0x006D, 0x004D},
{0x004E, 0x006E, 0x004E}, {0x004F, 0x006F, 0x004F},
{0x0050, 0x0070, 0x0050}, {0x0051, 0x0071, 0x0051},
{0x0052, 0x0072, 0x0052}, {0x0053, 0x0073, 0x0053},
{0x0054, 0x0074, 0x0054}, {0x0055, 0x0075, 0x0055},
{0x0056, 0x0076, 0x0056}, {0x0057, 0x0077, 0x0057},
{0x0058, 0x0078, 0x0058}, {0x0059, 0x0079, 0x0059},
{0x005A, 0x007A, 0x005A}, {0x005B, 0x005B, 0x005B},
{0x005C, 0x005C, 0x005C}, {0x005D, 0x005D, 0x005D},
{0x005E, 0x005E, 0x005E}, {0x005F, 0x005F, 0x005F},
{0x0060, 0x0060, 0x0060}, {0x0041, 0x0061, 0x0041},
{0x0042, 0x0062, 0x0042}, {0x0043, 0x0063, 0x0043},
{0x0044, 0x0064, 0x0044}, {0x0045, 0x0065, 0x0045},
{0x0046, 0x0066, 0x0046}, {0x0047, 0x0067, 0x0047},
{0x0048, 0x0068, 0x0048}, {0x0130, 0x0069, 0x0049},
{0x004A, 0x006A, 0x004A}, {0x004B, 0x006B, 0x004B},
{0x004C, 0x006C, 0x004C}, {0x004D, 0x006D, 0x004D},
{0x004E, 0x006E, 0x004E}, {0x004F, 0x006F, 0x004F},
{0x0050, 0x0070, 0x0050}, {0x0051, 0x0071, 0x0051},
{0x0052, 0x0072, 0x0052}, {0x0053, 0x0073, 0x0053},
{0x0054, 0x0074, 0x0054}, {0x0055, 0x0075, 0x0055},
{0x0056, 0x0076, 0x0056}, {0x0057, 0x0077, 0x0057},
{0x0058, 0x0078, 0x0058}, {0x0059, 0x0079, 0x0059},
{0x005A, 0x007A, 0x005A}, {0x007B, 0x007B, 0x007B},
{0x007C, 0x007C, 0x007C}, {0x007D, 0x007D, 0x007D},
{0x007E, 0x007E, 0x007E}, {0x007F, 0x007F, 0x007F},
{0x0080, 0x0080, 0x0080}, {0x0081, 0x0081, 0x0081},
{0x0082, 0x0082, 0x0082}, {0x0083, 0x0083, 0x0083},
{0x0084, 0x0084, 0x0084}, {0x0085, 0x0085, 0x0085},
{0x0086, 0x0086, 0x0086}, {0x0087, 0x0087, 0x0087},
{0x0088, 0x0088, 0x0088}, {0x0089, 0x0089, 0x0089},
{0x008A, 0x008A, 0x008A}, {0x008B, 0x008B, 0x008B},
{0x008C, 0x008C, 0x008C}, {0x008D, 0x008D, 0x008D},
{0x008E, 0x008E, 0x008E}, {0x008F, 0x008F, 0x008F},
{0x0090, 0x0090, 0x0090}, {0x0091, 0x0091, 0x0091},
{0x0092, 0x0092, 0x0092}, {0x0093, 0x0093, 0x0093},
{0x0094, 0x0094, 0x0094}, {0x0095, 0x0095, 0x0095},
{0x0096, 0x0096, 0x0096}, {0x0097, 0x0097, 0x0097},
{0x0098, 0x0098, 0x0098}, {0x0099, 0x0099, 0x0099},
{0x009A, 0x009A, 0x009A}, {0x009B, 0x009B, 0x009B},
{0x009C, 0x009C, 0x009C}, {0x009D, 0x009D, 0x009D},
{0x009E, 0x009E, 0x009E}, {0x009F, 0x009F, 0x009F},
{0x00A0, 0x00A0, 0x00A0}, {0x00A1, 0x00A1, 0x00A1},
{0x00A2, 0x00A2, 0x00A2}, {0x00A3, 0x00A3, 0x00A3},
{0x00A4, 0x00A4, 0x00A4}, {0x00A5, 0x00A5, 0x00A5},
{0x00A6, 0x00A6, 0x00A6}, {0x00A7, 0x00A7, 0x00A7},
{0x00A8, 0x00A8, 0x00A8}, {0x00A9, 0x00A9, 0x00A9},
{0x00AA, 0x00AA, 0x00AA}, {0x00AB, 0x00AB, 0x00AB},
{0x00AC, 0x00AC, 0x00AC}, {0x00AD, 0x00AD, 0x00AD},
{0x00AE, 0x00AE, 0x00AE}, {0x00AF, 0x00AF, 0x00AF},
{0x00B0, 0x00B0, 0x00B0}, {0x00B1, 0x00B1, 0x00B1},
{0x00B2, 0x00B2, 0x00B2}, {0x00B3, 0x00B3, 0x00B3},
{0x00B4, 0x00B4, 0x00B4}, {0x039C, 0x00B5, 0x039C},
{0x00B6, 0x00B6, 0x00B6}, {0x00B7, 0x00B7, 0x00B7},
{0x00B8, 0x00B8, 0x00B8}, {0x00B9, 0x00B9, 0x00B9},
{0x00BA, 0x00BA, 0x00BA}, {0x00BB, 0x00BB, 0x00BB},
{0x00BC, 0x00BC, 0x00BC}, {0x00BD, 0x00BD, 0x00BD},
{0x00BE, 0x00BE, 0x00BE}, {0x00BF, 0x00BF, 0x00BF},
{0x00C0, 0x00E0, 0x0041}, {0x00C1, 0x00E1, 0x0041},
{0x00C2, 0x00E2, 0x0041}, {0x00C3, 0x00E3, 0x0041},
{0x00C4, 0x00E4, 0x0041}, {0x00C5, 0x00E5, 0x0041},
{0x00C6, 0x00E6, 0x00C6}, {0x00C7, 0x00E7, 0x0043},
{0x00C8, 0x00E8, 0x0045}, {0x00C9, 0x00E9, 0x0045},
{0x00CA, 0x00EA, 0x0045}, {0x00CB, 0x00EB, 0x0045},
{0x00CC, 0x00EC, 0x0049}, {0x00CD, 0x00ED, 0x0049},
{0x00CE, 0x00EE, 0x0049}, {0x00CF, 0x00EF, 0x0049},
{0x00D0, 0x00F0, 0x00D0}, {0x00D1, 0x00F1, 0x004E},
{0x00D2, 0x00F2, 0x004F}, {0x00D3, 0x00F3, 0x004F},
{0x00D4, 0x00F4, 0x004F}, {0x00D5, 0x00F5, 0x004F},
{0x00D6, 0x00F6, 0x004F}, {0x00D7, 0x00D7, 0x00D7},
{0x00D8, 0x00F8, 0x00D8}, {0x00D9, 0x00F9, 0x0055},
{0x00DA, 0x00FA, 0x0055}, {0x00DB, 0x00FB, 0x0055},
{0x00DC, 0x00FC, 0x0055}, {0x00DD, 0x00FD, 0x0059},
{0x00DE, 0x00FE, 0x00DE}, {0x00DF, 0x00DF, 0x00DF},
{0x00C0, 0x00E0, 0x0041}, {0x00C1, 0x00E1, 0x0041},
{0x00C2, 0x00E2, 0x0041}, {0x00C3, 0x00E3, 0x0041},
{0x00C4, 0x00E4, 0x0041}, {0x00C5, 0x00E5, 0x0041},
{0x00C6, 0x00E6, 0x00C6}, {0x00C7, 0x00E7, 0x0043},
{0x00C8, 0x00E8, 0x0045}, {0x00C9, 0x00E9, 0x0045},
{0x00CA, 0x00EA, 0x0045}, {0x00CB, 0x00EB, 0x0045},
{0x00CC, 0x00EC, 0x0049}, {0x00CD, 0x00ED, 0x0049},
{0x00CE, 0x00EE, 0x0049}, {0x00CF, 0x00EF, 0x0049},
{0x00D0, 0x00F0, 0x00D0}, {0x00D1, 0x00F1, 0x004E},
{0x00D2, 0x00F2, 0x004F}, {0x00D3, 0x00F3, 0x004F},
{0x00D4, 0x00F4, 0x004F}, {0x00D5, 0x00F5, 0x004F},
{0x00D6, 0x00F6, 0x004F}, {0x00F7, 0x00F7, 0x00F7},
{0x00D8, 0x00F8, 0x00D8}, {0x00D9, 0x00F9, 0x0055},
{0x00DA, 0x00FA, 0x0055}, {0x00DB, 0x00FB, 0x0055},
{0x00DC, 0x00FC, 0x0055}, {0x00DD, 0x00FD, 0x0059},
{0x00DE, 0x00FE, 0x00DE}, {0x0178, 0x00FF, 0x0059}};
extern ObUnicaseInfoChar utf8_plane01[];
extern ObUnicaseInfoChar utf8_plane02[];
extern ObUnicaseInfoChar utf8_plane03[];
extern ObUnicaseInfoChar utf8_plane04[];
extern ObUnicaseInfoChar utf8_plane05[];
extern ObUnicaseInfoChar utf8_plane1E[];
extern ObUnicaseInfoChar utf8_plane1F[];
extern ObUnicaseInfoChar utf8_plane21[];
extern ObUnicaseInfoChar utf8_plane24[];
extern ObUnicaseInfoChar utf8_planeFF[];
const static ObUnicaseInfoChar *ob_unicase_pages_turkish[256] = {
turk00, utf8_plane01, utf8_plane02, utf8_plane03, utf8_plane04, utf8_plane05, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, utf8_plane1E, utf8_plane1F,
nullptr, utf8_plane21, nullptr, nullptr, utf8_plane24, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, utf8_planeFF};
ObUnicaseInfo ob_unicase_turkish = {0xFFFF, ob_unicase_pages_turkish};
static uint16 uca_000data[]= { /* 0000 (4 weights per char) */
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,

View File

@ -1126,6 +1126,25 @@ PAD_MIN_MAX:
return FALSE;
}
const unsigned char *skip_trailing_space_utf16(const struct ObCharsetInfo * __attribute__((unused)), const unsigned char *ptr,size_t len)
{
const unsigned char *end= ptr + len;
while (end - 1 > ptr && end[-2] == 0x00 && end[-1] == 0x20)
end-=2;
return (end);
}
const unsigned char *skip_trailing_space_utf16le(const struct ObCharsetInfo * __attribute__((unused)), const unsigned char *ptr,size_t len)
{
const unsigned char *end= ptr + len;
while (end - 1 > ptr && end[-2] == 0x20 && end[-1] == 0x00)
end-=2;
return (end);
}
ObCharsetHandler ob_charset_utf16_handler=
{
NULL,
@ -1148,7 +1167,8 @@ ObCharsetHandler ob_charset_utf16_handler=
ob_strntoull_mb2_or_mb4,
ob_strntod_mb2_or_mb4,
ob_strntoull10rnd_mb2_or_mb4,
ob_scan_mb2
ob_scan_mb2,
skip_trailing_space_utf16
};
static ObCollationHandler ob_collation_utf16_bin_handler =
@ -1256,3 +1276,171 @@ ObCharsetInfo ob_charset_utf16_general_ci=
&ob_collation_utf16_general_ci_handler,
PAD_SPACE
};
/*
D800..DB7F - Non-provate surrogate high (896 pages)
DB80..DBFF - Private surrogate high (128 pages)
DC00..DFFF - Surrogate low (1024 codes in a page)
*/
#define OB_UTF16_SURROGATE_HIGH_FIRST 0xD800
#define OB_UTF16_SURROGATE_LOW_FIRST 0xDC00
#define OB_UTF16_SURROGATE_LOW_LAST 0xDFFF
static size_t ob_lengthsp_utf16le(const ObCharsetInfo *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end = ptr + length;
while (end > ptr + 1 && uint2korr(end - 2) == 0x20) end -= 2;
return (size_t)(end - ptr);
}
static int
ob_utf16le_uni(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *pwc, const unsigned char *str, const unsigned char *end)
{
ob_wc_t lo;
if (str + 2 > end) return OB_CS_TOOSMALL2;
if ((*pwc = uint2korr(str)) < OB_UTF16_SURROGATE_HIGH_FIRST ||
(*pwc > OB_UTF16_SURROGATE_LOW_LAST))
return 2; /* [0000-D7FF,E000-FFFF] */
if (*pwc >= OB_UTF16_SURROGATE_LOW_FIRST)
return OB_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
if (str + 4 > end) return OB_CS_TOOSMALL4;
str += 2;
if ((lo = uint2korr(str)) < OB_UTF16_SURROGATE_LOW_FIRST ||
lo > OB_UTF16_SURROGATE_LOW_LAST)
return OB_CS_ILSEQ; /* Expected low surrogate part, got something else */
*pwc = 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
return 4;
}
static int ob_uni_utf16le(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc, unsigned char *str, unsigned char *end)
{
if (wc < OB_UTF16_SURROGATE_HIGH_FIRST ||
(wc > OB_UTF16_SURROGATE_LOW_LAST && wc <= 0xFFFF)) {
if (str + 2 > end) return OB_CS_TOOSMALL2;
int2store(str, (uint16)wc);
return 2; /* [0000-D7FF, E000-FFFF] */
}
if (wc < 0xFFFF || wc > 0x10FFFF)
return OB_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
if (str + 4 > end) return OB_CS_TOOSMALL4;
wc -= 0x10000;
int2store(str, (0xD800 | ((wc >> 10) & 0x3FF)));
str += 2;
int2store(str, (0xDC00 | (wc & 0x3FF)));
return 4; /* [010000-10FFFF] */
}
ObCharsetHandler ob_charset_utf16le_handler=
{
NULL,
ob_ismbchar_utf16,
ob_mbcharlen_utf16,
ob_numchars_utf16,
ob_charpos_utf16,
ob_max_bytes_charpos_mb,
ob_well_formed_len_utf16,
ob_lengthsp_utf16le,
ob_utf16le_uni, /* mb_wc */
ob_uni_utf16le, /* wc_mb */
ob_mb_ctype_mb,
ob_caseup_utf16,
ob_casedn_utf16,
ob_fill_mb2,
ob_strntol_mb2_or_mb4,
ob_strntoul_mb2_or_mb4,
ob_strntoll_mb2_or_mb4,
ob_strntoull_mb2_or_mb4,
ob_strntod_mb2_or_mb4,
ob_strntoull10rnd_mb2_or_mb4,
ob_scan_mb2,
skip_trailing_space_utf16le
};
ObCharsetInfo ob_charset_utf16le_general_ci=
{
56,
0,
0,
OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII,
OB_UTF16LE,
OB_UTF16LE_GENERAL_CI,
"UTF-16LE Unicode",
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
2,
4,
1, /* mbmaxlenlen */
0,
0xFFFF,
' ',
0,
1,
1,
&ob_charset_utf16le_handler,
&ob_collation_utf16_general_ci_handler,
PAD_SPACE
};
ObCharsetInfo ob_charset_utf16le_bin=
{
62,
0,
0,
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_STRNXFRM|OB_CS_UNICODE|OB_CS_NONASCII,
OB_UTF16LE,
OB_UTF16LE_BIN,
"UTF-16LE Unicode",
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
2,
4,
1, /* mbmaxlenlen */
0,
0xFFFF,
' ',
0,
1,
1,
&ob_charset_utf16le_handler,
&ob_collation_utf16_bin_handler,
PAD_SPACE
};

View File

@ -18,6 +18,7 @@
*
*/
#include "lib/charset/ob_byteorder.h"
#include "lib/charset/ob_ctype.h"
#include "lib/charset/ob_dtoa.h"
#include "lib/charset/ob_uctype.h"
@ -468,9 +469,7 @@ size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
src+= res;
if (uni_plane)
ob_tosort_unicode(uni_plane, &wc, cs->state);
if ((res= cs->cset->wc_mb(cs, wc, dst, de)) <= 0)
break;
dst+= res;
dst = store16be(dst, wc); //这是是不是bydesign的
}
ob_strnxfrm_unicode_help(&dst,&de, nweights, flags, &dst0);
return dst - dst0;
@ -879,9 +878,13 @@ size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo *cs,
break;
}
src+= res;
if ((res= cs->cset->wc_mb(cs, wc, dst, de)) <= 0)
break;
dst+= res;
*dst++= (uchar) (wc >> 16);
if (dst < de)
{
*dst++= (uchar) ((wc >> 8) & 0xFF);
if (dst < de)
*dst++= (uchar) (wc & 0xFF);
}
}
if (flags & OB_STRXFRM_PAD_WITH_SPACE)
{
@ -953,7 +956,7 @@ ObCharsetHandler ob_charset_utf8mb4_handler=
ob_max_bytes_charpos_mb,
ob_well_formed_len_utf8mb4,
ob_lengthsp_8bit,
ob_mb_wc_utf8mb4,
ob_mb_wc_utf8mb4_thunk,
ob_wc_mb_utf8mb4,
ob_mb_ctype_mb,
ob_caseup_utf8mb4,
@ -966,7 +969,8 @@ ObCharsetHandler ob_charset_utf8mb4_handler=
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
ob_scan_8bit,
skip_trailing_space
};
static ObCollationHandler ob_collation_utf8mb4_general_ci_handler=

View File

@ -9,6 +9,9 @@
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OB_CTYPE_UTF8_TAB_H_
#define OB_CTYPE_UTF8_TAB_H_
static unsigned char ctype_utf8mb4[]=
{
0,
@ -73,7 +76,7 @@ static uchar to_upper_utf8mb4[]=
};
static ObUnicaseInfoChar utf8_plane00[]={
ObUnicaseInfoChar utf8_plane00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@ -204,7 +207,7 @@ static ObUnicaseInfoChar utf8_plane00[]={
{0x00DE,0x00FE,0x00DE}, {0x0178,0x00FF,0x0059}
};
static ObUnicaseInfoChar utf8_plane01[]={
ObUnicaseInfoChar utf8_plane01[]={
{0x0100,0x0101,0x0041}, {0x0100,0x0101,0x0041},
{0x0102,0x0103,0x0041}, {0x0102,0x0103,0x0041},
{0x0104,0x0105,0x0041}, {0x0104,0x0105,0x0041},
@ -335,7 +338,7 @@ static ObUnicaseInfoChar utf8_plane01[]={
{0x01FE,0x01FF,0x00D8}, {0x01FE,0x01FF,0x00D8}
};
static ObUnicaseInfoChar utf8_plane02[]={
ObUnicaseInfoChar utf8_plane02[]={
{0x0200,0x0201,0x0041}, {0x0200,0x0201,0x0041},
{0x0202,0x0203,0x0041}, {0x0202,0x0203,0x0041},
{0x0204,0x0205,0x0045}, {0x0204,0x0205,0x0045},
@ -466,7 +469,7 @@ static ObUnicaseInfoChar utf8_plane02[]={
{0x02FE,0x02FE,0x02FE}, {0x02FF,0x02FF,0x02FF}
};
static ObUnicaseInfoChar utf8_plane03[]={
ObUnicaseInfoChar utf8_plane03[]={
{0x0300,0x0300,0x0300}, {0x0301,0x0301,0x0301},
{0x0302,0x0302,0x0302}, {0x0303,0x0303,0x0303},
{0x0304,0x0304,0x0304}, {0x0305,0x0305,0x0305},
@ -597,7 +600,7 @@ static ObUnicaseInfoChar utf8_plane03[]={
{0x03FE,0x03FE,0x03FE}, {0x03FF,0x03FF,0x03FF}
};
static ObUnicaseInfoChar utf8_plane04[]={
ObUnicaseInfoChar utf8_plane04[]={
{0x0400,0x0450,0x0415}, {0x0401,0x0451,0x0415},
{0x0402,0x0452,0x0402}, {0x0403,0x0453,0x0413},
{0x0404,0x0454,0x0404}, {0x0405,0x0455,0x0405},
@ -728,7 +731,7 @@ static ObUnicaseInfoChar utf8_plane04[]={
{0x04FE,0x04FE,0x04FE}, {0x04FF,0x04FF,0x04FF}
};
static ObUnicaseInfoChar utf8_plane05[]={
ObUnicaseInfoChar utf8_plane05[]={
{0x0500,0x0500,0x0500}, {0x0501,0x0501,0x0501},
{0x0502,0x0502,0x0502}, {0x0503,0x0503,0x0503},
{0x0504,0x0504,0x0504}, {0x0505,0x0505,0x0505},
@ -859,7 +862,7 @@ static ObUnicaseInfoChar utf8_plane05[]={
{0x05FE,0x05FE,0x05FE}, {0x05FF,0x05FF,0x05FF}
};
static ObUnicaseInfoChar utf8_plane1E[]={
ObUnicaseInfoChar utf8_plane1E[]={
{0x1E00,0x1E01,0x0041}, {0x1E00,0x1E01,0x0041},
{0x1E02,0x1E03,0x0042}, {0x1E02,0x1E03,0x0042},
{0x1E04,0x1E05,0x0042}, {0x1E04,0x1E05,0x0042},
@ -990,7 +993,7 @@ static ObUnicaseInfoChar utf8_plane1E[]={
{0x1EFE,0x1EFE,0x1EFE}, {0x1EFF,0x1EFF,0x1EFF}
};
static ObUnicaseInfoChar utf8_plane1F[]={
ObUnicaseInfoChar utf8_plane1F[]={
{0x1F08,0x1F00,0x0391}, {0x1F09,0x1F01,0x0391},
{0x1F0A,0x1F02,0x0391}, {0x1F0B,0x1F03,0x0391},
{0x1F0C,0x1F04,0x0391}, {0x1F0D,0x1F05,0x0391},
@ -1121,7 +1124,7 @@ static ObUnicaseInfoChar utf8_plane1F[]={
{0x1FFE,0x1FFE,0x1FFE}, {0x1FFF,0x1FFF,0x1FFF}
};
static ObUnicaseInfoChar utf8_plane21[]={
ObUnicaseInfoChar utf8_plane21[]={
{0x2100,0x2100,0x2100}, {0x2101,0x2101,0x2101},
{0x2102,0x2102,0x2102}, {0x2103,0x2103,0x2103},
{0x2104,0x2104,0x2104}, {0x2105,0x2105,0x2105},
@ -1252,7 +1255,7 @@ static ObUnicaseInfoChar utf8_plane21[]={
{0x21FE,0x21FE,0x21FE}, {0x21FF,0x21FF,0x21FF}
};
static ObUnicaseInfoChar utf8_plane24[]={
ObUnicaseInfoChar utf8_plane24[]={
{0x2400,0x2400,0x2400}, {0x2401,0x2401,0x2401},
{0x2402,0x2402,0x2402}, {0x2403,0x2403,0x2403},
{0x2404,0x2404,0x2404}, {0x2405,0x2405,0x2405},
@ -1383,7 +1386,7 @@ static ObUnicaseInfoChar utf8_plane24[]={
{0x24FE,0x24FE,0x24FE}, {0x24FF,0x24FF,0x24FF}
};
static ObUnicaseInfoChar utf8_planeFF[]={
ObUnicaseInfoChar utf8_planeFF[]={
{0xFF00,0xFF00,0xFF00}, {0xFF01,0xFF01,0xFF01},
{0xFF02,0xFF02,0xFF02}, {0xFF03,0xFF03,0xFF03},
{0xFF04,0xFF04,0xFF04}, {0xFF05,0xFF05,0xFF05},
@ -4461,3 +4464,5 @@ const ObUnicaseInfoChar *ob_unicase_pages_unicode520[4352] = {
ObUnicaseInfo ob_unicase_unicode520 = {0x10FFFF, ob_unicase_pages_unicode520};
#endif

View File

@ -148,7 +148,7 @@ TEST_F(TestCharset, sortkey)
char space[10] = " ";
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, space, strlen(space), aa1, 10, is_valid_unicode);
ASSERT_EQ(size1, 2);
ASSERT_EQ(size1, 4);
ASSERT_TRUE(is_valid_unicode);
char empty[10] = "";
@ -161,7 +161,7 @@ TEST_F(TestCharset, sortkey)
invalid[1] = char(0x80);
invalid[2] = '\0';
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, invalid, strlen(invalid), aa1, 10, is_valid_unicode);
ASSERT_EQ(size1, 1);
ASSERT_EQ(size1, 2);
ASSERT_FALSE(is_valid_unicode);
//std::map<int, int> charset{
@ -202,11 +202,11 @@ TEST_F(TestCharset, sortkey)
std::vector<std::vector<int>>result{
{0,1,1,1,1},
{1,4,1,1,0},
{2,6,1,0,0},
{2,4,1,0,0},
{3,6,1,0,0},
{4,1,1,1,1},
{5,4,1,0,0},
{6,4,1,0,0},
{6,6,1,0,0},
{7,1,1,1,1},
{8,4,1,1,1},
{9,10,1,10,1},
@ -612,7 +612,8 @@ TEST_F(TestCharset, tolower)
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
auto charset_type = static_cast<ObCharsetType>(cs_i);
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type)
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type
|| CHARSET_UTF16LE == charset_type || CHARSET_BINARY == charset_type)
continue;
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
@ -647,7 +648,8 @@ TEST_F(TestCharset, toupper)
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
auto charset_type = static_cast<ObCharsetType>(cs_i);
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type)
if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type
|| CHARSET_UTF16LE == charset_type || CHARSET_BINARY == charset_type)
continue;
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
@ -855,13 +857,17 @@ TEST_F(TestCharset, check_mbmaxlenlen)
}
}
std::vector<const char *> test_strings = {"1", "abcdef", "ab1dc4", "你好", "b今a天", "1abad "};
std::vector<const char *> test_strings = {"1", "abcdef", "ab1dc4", "", "b今a天", "1abad "};
TEST_F(TestCharset, basic_collation_handler_test)
{
ObArenaAllocator alloc;
for (int i = CS_TYPE_INVALID; i < CS_TYPE_EXTENDED_MARK; i++) {
for (int i = CS_TYPE_INVALID; i < CS_TYPE_MAX; i++) {
ObCollationType coll = static_cast<ObCollationType>(i);
if (!ObCharset::is_valid_charset(coll)) {
continue;
}
const ObCharsetInfo * cs = ObCharset::get_charset(coll);
const char *coll_name = ObCharset::collation_name(coll);
if (OB_NOT_NULL(cs)) {
@ -957,6 +963,10 @@ TEST_F(TestCharset, foreach_char) {
"抚凌云而自惜;钟期既遇,奏流水以何惭?呜呼!胜地不常,盛筵难再;兰亭已矣,梓泽丘墟。临别赠言,幸承恩于伟饯"
"登高作赋,是所望于群公。敢竭鄙怀,恭疏短引;一言均赋,四韵俱成。请洒潘江,各倾陆海云尔:滕王高阁临江渚"
"佩玉鸣鸾罢歌舞。画栋朝飞南浦云,珠帘暮卷西山雨。闲云潭影日悠悠,物换星移几度秋。阁中帝子今何在?槛外长江空自流。";
const char *data1 = "豫章故郡,洪都新府。星分翼軒,地接衡廬。襟三江而帶五湖,控蠻荊而引甌越。物華天寶,龍光射牛斗之墟。落霞與孤鷺齊飛,秋水共長天一色。"
"人傑地靈,徐孺下陳蕃之榻。雄州霧列,俊採星馳。台隍枕夷夏之交,賓主盡東南之美。都督閻之雅望,棨戟遙臨"
"時維九月,序屬三秋。潦水盡而寒潭清,煙光凝而暮山紫。物華天寶";
/*
const char *data = "I hear America singing, the varied carols I hear,Those of mechanics, "
"each one singing his as it should be blithe and strong,The carpenter "
@ -990,6 +1000,7 @@ TEST_F(TestCharset, foreach_char) {
};
ObString data_in(data);
ObString data_in1(data1);
ObArenaAllocator alloc;
for (int i = CHARSET_BINARY + 1; i <= CHARSET_GB18030; i++) {
@ -1036,8 +1047,11 @@ TEST_F(TestCharset, foreach_char) {
ObCollationType test_collation_type = ObCharset::get_default_collation(test_cs_type);
ObString data_out;
ASSERT_TRUE(ObCharset::is_valid_collation(test_collation_type));
if (ObCharset::get_charset(test_collation_type)->mbmaxlen == 1) {
if (ObCharset::get_charset(test_collation_type)->mbmaxlen == 1 || test_cs_type == CHARSET_SJIS) {
data_out = data_in;
continue;
} else if (test_cs_type == CHARSET_BIG5 || test_cs_type == CHARSET_HKSCS || test_cs_type == CHARSET_HKSCS31) {
ASSERT_TRUE(OB_SUCCESS == ObCharset::charset_convert(alloc, data_in1, CS_TYPE_UTF8MB4_BIN, test_collation_type, data_out));
} else {
ASSERT_TRUE(OB_SUCCESS == ObCharset::charset_convert(alloc, data_in, CS_TYPE_UTF8MB4_BIN, test_collation_type, data_out));
}

View File

@ -30,8 +30,23 @@ ob_set_subtarget(ob_share ALONE
vector/expr_cmp_func_parts/expr_cmp_func_part_13.cpp
vector/expr_cmp_func_parts/expr_cmp_func_part_14.cpp
vector/expr_cmp_func_parts/expr_cmp_func_part_15.cpp
aggregate/approx_count_distinct_synopsis.cpp
datum/ob_datum_funcs_compilation_0.cpp
datum/ob_datum_funcs_compilation_1.cpp
datum/ob_datum_funcs_compilation_2.cpp
datum/ob_datum_funcs_compilation_3.cpp
datum/ob_datum_funcs_compilation_4.cpp
datum/ob_datum_funcs_compilation_5.cpp
datum/ob_datum_funcs_compilation_6.cpp
datum/ob_datum_funcs_compilation_7.cpp
datum/ob_datum_funcs_compilation_8.cpp
datum/ob_datum_funcs_compilation_9.cpp
datum/ob_datum_funcs_compilation_10.cpp
datum/ob_datum_funcs_compilation_11.cpp
datum/ob_datum_funcs_compilation_12.cpp
datum/ob_datum_funcs_compilation_13.cpp
datum/ob_datum_funcs_compilation_14.cpp
datum/ob_datum_funcs_compilation_15.cpp
)
file(GLOB SCHEMA_CPPS "inner_table/ob_inner_table_schema.*.cpp")

View File

@ -488,14 +488,129 @@ typedef ObConstIntMapping<0,
CS_TYPE_GB18030_2022_RADICAL_CS, 1,
CS_TYPE_GB18030_2022_STROKE_CI, 1,
CS_TYPE_GB18030_2022_STROKE_CS, 1,
CS_TYPE_UTF8MB4_CROATIAN_CI, 1,
CS_TYPE_UTF8MB4_UNICODE_520_CI, 1,
CS_TYPE_UTF8MB4_CZECH_CI, 1,
CS_TYPE_ASCII_GENERAL_CI,1,
CS_TYPE_ASCII_BIN,1,
CS_TYPE_TIS620_THAI_CI,1,
CS_TYPE_TIS620_BIN,1,
CS_TYPE_UTF8MB4_0900_AI_CI, 1> SupportedCollections;
CS_TYPE_UTF16LE_GENERAL_CI, 1,
CS_TYPE_UTF16LE_BIN, 1,
CS_TYPE_SJIS_JAPANESE_CI, 1,
CS_TYPE_SJIS_BIN, 1,
CS_TYPE_BIG5_CHINESE_CI, 1,
CS_TYPE_BIG5_BIN, 1,
CS_TYPE_HKSCS_BIN, 1,
CS_TYPE_HKSCS31_BIN, 1,
CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI, 1,
CS_TYPE_UTF8MB4_LATVIAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_ROMANIAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_SLOVENIAN_UCA_CI, 1,
CS_TYPE_UTF8MB4_POLISH_UCA_CI , 1,
CS_TYPE_UTF8MB4_ESTONIAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_SPANISH_UCA_CI , 1,
CS_TYPE_UTF8MB4_SWEDISH_UCA_CI , 1,
CS_TYPE_UTF8MB4_TURKISH_UCA_CI , 1,
CS_TYPE_UTF8MB4_CZECH_UCA_CI , 1,
CS_TYPE_UTF8MB4_DANISH_UCA_CI , 1,
CS_TYPE_UTF8MB4_LITHUANIAN_UCA_CI, 1,
CS_TYPE_UTF8MB4_SLOVAK_UCA_CI , 1,
CS_TYPE_UTF8MB4_SPANISH2_UCA_CI , 1,
CS_TYPE_UTF8MB4_ROMAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_PERSIAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_ESPERANTO_UCA_CI, 1,
CS_TYPE_UTF8MB4_HUNGARIAN_UCA_CI, 1,
CS_TYPE_UTF8MB4_SINHALA_UCA_CI , 1,
CS_TYPE_UTF8MB4_GERMAN2_UCA_CI , 1,
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI , 1,
CS_TYPE_UTF8MB4_UNICODE_520_CI , 1,
CS_TYPE_UTF8MB4_VIETNAMESE_CI , 1,
CS_TYPE_UTF16_ICELANDIC_UCA_CI , 1,
CS_TYPE_UTF16_LATVIAN_UCA_CI , 1,
CS_TYPE_UTF16_ROMANIAN_UCA_CI , 1,
CS_TYPE_UTF16_SLOVENIAN_UCA_CI , 1,
CS_TYPE_UTF16_POLISH_UCA_CI , 1,
CS_TYPE_UTF16_ESTONIAN_UCA_CI , 1,
CS_TYPE_UTF16_SPANISH_UCA_CI , 1,
CS_TYPE_UTF16_SWEDISH_UCA_CI , 1,
CS_TYPE_UTF16_TURKISH_UCA_CI , 1,
CS_TYPE_UTF16_CZECH_UCA_CI , 1,
CS_TYPE_UTF16_DANISH_UCA_CI , 1,
CS_TYPE_UTF16_LITHUANIAN_UCA_CI , 1,
CS_TYPE_UTF16_SLOVAK_UCA_CI , 1,
CS_TYPE_UTF16_SPANISH2_UCA_CI , 1,
CS_TYPE_UTF16_ROMAN_UCA_CI , 1,
CS_TYPE_UTF16_PERSIAN_UCA_CI , 1,
CS_TYPE_UTF16_ESPERANTO_UCA_CI , 1,
CS_TYPE_UTF16_HUNGARIAN_UCA_CI , 1,
CS_TYPE_UTF16_SINHALA_UCA_CI , 1,
CS_TYPE_UTF16_GERMAN2_UCA_CI , 1,
CS_TYPE_UTF16_CROATIAN_UCA_CI , 1,
CS_TYPE_UTF16_UNICODE_520_CI , 1,
CS_TYPE_UTF16_VIETNAMESE_CI , 1,
CS_TYPE_UTF8MB4_0900_AI_CI , 1,
CS_TYPE_UTF8MB4_DE_PB_0900_AI_CI , 1,
CS_TYPE_UTF8MB4_IS_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_LV_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_RO_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_SL_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_PL_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_ET_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_ES_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_SV_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_TR_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_CS_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_DA_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_LT_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_SK_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_ES_TRAD_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_LA_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_EO_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_HU_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_HR_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_VI_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_DE_PB_0900_AS_CS , 1,
CS_TYPE_UTF8MB4_IS_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_LV_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_RO_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_SL_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_PL_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_ET_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_ES_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_SV_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_TR_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_CS_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_DA_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_LT_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_SK_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_ES_TRAD_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_LA_0900_AS_CS , 1,
CS_TYPE_UTF8MB4_EO_0900_AS_CS , 1,
CS_TYPE_UTF8MB4_HU_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_HR_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_VI_0900_AS_CS , 1,
CS_TYPE_UTF8MB4_JA_0900_AS_CS , 1,
CS_TYPE_UTF8MB4_JA_0900_AS_CS_KS , 1,
CS_TYPE_UTF8MB4_0900_AS_CI, 1,
CS_TYPE_UTF8MB4_RU_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_RU_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_ZH_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_0900_BIN, 1,
CS_TYPE_UTF8MB4_NB_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_NB_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_NN_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_NN_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_SR_LATN_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_SR_LATN_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_BS_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_BS_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_BG_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_BG_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_GL_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_GL_0900_AS_CS, 1,
CS_TYPE_UTF8MB4_MN_CYRL_0900_AI_CI, 1,
CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS, 1,
CS_TYPE_DEC8_SWEDISH_CI, 1,
CS_TYPE_DEC8_BIN, 1> SupportedCollections;
// bool is_calc_with_end_space(ObObjType type1, ObObjType type2,
// bool is_oracle_mode,

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,55 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_STR_DATUM_FUNCS_IPP
#define OCEANBASE_STR_DATUM_FUNCS_IPP
#include "ob_datum_funcs.h"
#include "ob_datum_cmp_func_def.h"
#include "common/object/ob_obj_funcs.h"
#include "sql/engine/ob_serializable_function.h"
#include "sql/engine/ob_bit_vector.h"
#include "share/ob_cluster_version.h"
#include "share/datum/ob_datum_funcs_impl.h"
namespace oceanbase
{
using namespace sql;
namespace common
{
static const int COMPILATION_UNIT = 16;
#define DEF_COMPILATION_VARS(name, max_val, unit_idx) \
constexpr int name##_unit_size = \
max_val / COMPILATION_UNIT + (max_val % COMPILATION_UNIT == 0 ? 0 : 1); \
constexpr int name##_start = \
(name##_unit_size * unit_idx < max_val ? name##_unit_size * unit_idx : max_val); \
constexpr int name##_end = \
(name##_start + name##_unit_size >= max_val ? max_val : name##_start + name##_unit_size);
#define DEF_STR_FUNC_INIT(unit_idx) \
void __init_str_func##unit_idx() \
{ \
DEF_COMPILATION_VARS(cs, CS_TYPE_MAX, unit_idx); \
DEF_COMPILATION_VARS(ty, ObMaxType, unit_idx); \
DEF_COMPILATION_VARS(tc, ObMaxTC, unit_idx); \
DEF_COMPILATION_VARS(ty_basic, ObMaxType, unit_idx); \
ObArrayConstIniter<cs_end, str_cmp_initer, cs_start>::init(); \
Ob2DArrayConstIniter<cs_end, 2, str_basic_initer, cs_start, 0>::init(); \
Ob2DArrayConstIniter<ty_end, ObMaxType, InitTypeCmpArray, ty_start, 0>::init(); \
Ob2DArrayConstIniter<tc_end, ObMaxTC, InitTCCmpArray, tc_start, 0>::init(); \
ObArrayConstIniter<ty_basic_end, InitBasicFuncArray, ty_basic_start>::init(); \
}
} // end common
} // end oceanbase
#endif // OCEANBASE_STR_DATUM_FUNCS_IPP

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(0);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(1);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(10);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(11);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(12);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(13);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(14);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(15);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(2);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(3);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(4);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(5);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(6);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(7);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(8);
} // end common
} // end oceanbase

View File

@ -0,0 +1,21 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_datum_funcs_compilation.ipp"
namespace oceanbase
{
namespace common
{
DEF_STR_FUNC_INIT(9);
} // end common
} // end oceanbase

File diff suppressed because it is too large Load Diff

View File

@ -525,8 +525,9 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|| cs == CS_TYPE_GB18030_CHINESE_CI || cs == CS_TYPE_UTF16LE_GENERAL_CI
|| cs == CS_TYPE_UTF16LE_BIN
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
max_buf_len - to_len - safety_buf_size,
is_mem, is_valid_uni);
@ -577,8 +578,9 @@ int ObOrderPerservingEncoder::encode_from_string_varlen(
}
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|| cs == CS_TYPE_UTF16LE_GENERAL_CI || cs == CS_TYPE_UTF16LE_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
max_buf_len - to_len - safty_buf_size,
param.is_memcmp_, param.is_valid_uni_);
@ -877,6 +879,8 @@ int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_le
|| cs == CS_TYPE_UTF16_GENERAL_CI
|| cs == CS_TYPE_UTF16_BIN
|| cs == CS_TYPE_GB18030_CHINESE_CI
|| cs == CS_TYPE_UTF16LE_GENERAL_CI
|| cs == CS_TYPE_UTF16LE_BIN
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
if (with_empty_str) {
*to = 0x00;

View File

@ -358,8 +358,7 @@ int ObSchemaPrinter::print_table_definition_columns(const ObTableSchema &table_s
oceanbase::common::ObCharsetType::CHARSET_BINARY == charset_type) {
// observer perform no conversion of result sets or error messages, you can see more detail the official website of MySQL
} else {
ObCollationType collation_type = ObCharset::get_default_collation(charset_type);
if (OB_FAIL(ObCharset::charset_convert(allocator, default_value.get_string(), default_value.get_collation_type(), collation_type, out_str))) {
if (OB_FAIL(ObCharset::charset_convert(allocator, default_value.get_string(), default_value.get_collation_type(), ObCharset::get_system_collation(), out_str))) {
SHARE_SCHEMA_LOG(WARN, "fail to convert charset", K(ret));
}
}

View File

@ -1924,6 +1924,24 @@ int ObSysVarOnCheckFuncs::check_and_convert_charset(ObExecContext &ctx,
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("invalid type", K(ret), K(in_val));
}
if (OB_SUCC(ret)) {
if (0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CLIENT)
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CONNECTION)
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_RESULTS)) {
ObCollationType cstype = static_cast<ObCollationType>(out_val.get_int());
if (!ObCharset::is_valid_collation(cstype)) {
ret = OB_ERR_UNKNOWN_CHARSET;
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, in_val.get_string().length(), in_val.get_string().ptr());
} else if(ObCharset::get_charset(cstype)->mbminlen > 1) {
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
set_var.var_name_.length(),
set_var.var_name_.ptr(),
in_val.get_string().length(),
in_val.get_string().ptr());
}
}
}
}
return ret;
}
@ -1997,6 +2015,23 @@ int ObSysVarOnCheckFuncs::check_and_convert_collation_not_null(ObExecContext &ct
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("invalid type", K(ret), K(in_val));
}
if (OB_SUCC(ret)) {
if (0 == set_var.var_name_.case_compare(OB_SV_COLLATION_CONNECTION)) {
ObCollationType cstype = static_cast<ObCollationType>(out_val.get_int());
if (!ObCharset::is_valid_collation(cstype)) {
ret = OB_ERR_UNKNOWN_CHARSET;
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, in_val.get_string().length(), in_val.get_string().ptr());
} else if(ObCharset::get_charset(cstype)->mbminlen > 1) {
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
set_var.var_name_.length(),
set_var.var_name_.ptr(),
in_val.get_string().length(),
in_val.get_string().ptr());
}
}
}
}
return ret;
}

View File

@ -220,7 +220,7 @@ struct VecTCHashCalc<VEC_TC_LOB, HashMethod, hash_v2>
const uchar *key = reinterpret_cast<const uchar *>(char_data);
const uchar *pos = key;
int length = char_len;
key = skip_trailing_space(key, char_len, 0);
key = skip_trailing_space(&ob_charset_utf8mb4_bin, key, char_len);
length = (int)(key - pos);
res = HashMethod::hash((void*)pos, length, seed);
}
@ -419,7 +419,7 @@ struct VecTCHashCalc<VEC_TC_STRING, HashMethod, hash_v2> {
const uchar *key = reinterpret_cast<const uchar *>(data);
const uchar *pos = key;
int length = len;
key = skip_trailing_space(key, len, 0);
key = skip_trailing_space(&ob_charset_utf8mb4_bin, key, len);
length = (int)(key - pos);
res = HashMethod::hash((void*)pos, length, seed);
}

View File

@ -307,7 +307,7 @@ int ObDASDomainUtils::generate_spatial_index_rows(
int ret = OB_SUCCESS;
if (OB_ISNULL(helper)
|| OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type
|| ObCollationType::CS_TYPE_EXTENDED_MARK < type)
|| ObCollationType::CS_TYPE_PINYIN_BEGIN_MARK <= type)
|| OB_UNLIKELY(!words_count.created())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created()));

View File

@ -213,6 +213,19 @@ public:
ret = scan_proto<common::CHARSET_GB18030, handle_func, NEED_ESCAPED_RESULT>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;
case common::CHARSET_SJIS:
ret = scan_proto<common::CHARSET_SJIS, handle_func, NEED_ESCAPED_RESULT>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;
case common::CHARSET_BIG5:
ret = scan_proto<common::CHARSET_BIG5, handle_func, NEED_ESCAPED_RESULT>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;
case common::CHARSET_HKSCS:
case common::CHARSET_HKSCS31:
ret = scan_proto<common::CHARSET_HKSCS, handle_func, NEED_ESCAPED_RESULT>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
break;
default:
ret = scan_proto<common::CHARSET_BINARY, handle_func, NEED_ESCAPED_RESULT>(
str, end, nrows, escape_buf, escaped_buf_end, handle_one_line, errors, is_end_file);
@ -332,6 +345,27 @@ inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_GBK>(const char *ptr, c
return (0x81 <= c && c <= 0xFE) ? 2 : 1;
}
template<>
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_SJIS>(const char *ptr, const char *end) {
UNUSED(end);
unsigned char c = *ptr;
return ((0x81 <= (c) && (c) <= 0x9f) || ((0xe0 <= (c)) && (c) <= 0xfc)) ? 2 : 1;
}
template<>
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_BIG5>(const char *ptr, const char *end) {
UNUSED(end);
unsigned char c = *ptr;
return (0xa1 <= c && c <= 0xf9) ? 2 : 1;
}
template<>
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_HKSCS>(const char *ptr, const char *end) {
UNUSED(end);
unsigned char c = *ptr;
return (0x81 <= c && c <= 0xfe) ? 2 : 1;
}
template<>
inline int ObCSVGeneralParser::mbcharlen<common::CHARSET_GB18030>(const char *ptr, const char *end) {
int mb_len = 1;

View File

@ -844,22 +844,6 @@ int ObVariableSetExecutor::check_and_convert_sys_var(ObExecContext &ctx,
int ret = OB_SUCCESS;
//OB_ASSERT(true == var_node.is_system_variable_);
// collation_connection的取值有限制,不能设置成utf16
if (OB_SUCC(ret)) {
if ((0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CLIENT)
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_CONNECTION)
|| 0 == set_var.var_name_.case_compare(OB_SV_CHARACTER_SET_RESULTS)
|| 0 == set_var.var_name_.case_compare(OB_SV_COLLATION_CONNECTION))
&& (in_val.get_string().prefix_match_ci("utf16"))) {
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
set_var.var_name_.length(),
set_var.var_name_.ptr(),
in_val.get_string().length(),
in_val.get_string().ptr());
}
}
//check readonly
if (is_set_stmt && sys_var.is_readonly()) {
if (sys_var.is_with_upgrade() && GCONF.in_upgrade_mode()) {

View File

@ -656,8 +656,8 @@ int ObExprCast::get_cast_type(const bool enable_decimal_int,
dst_type.set_udt_id(param_type2.get_udt_id());
if (ob_is_collection_sql_type(obj_type)) {
// recover subschema id
dst_type.set_collation_type(static_cast<ObCollationType>(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]));
dst_type.set_collation_level(static_cast<ObCollationLevel>(parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX]));
dst_type.set_cs_type(static_cast<ObCollationType>(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX]));
dst_type.set_cs_level(static_cast<ObCollationLevel>(parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX]));
}
} else if (lib::is_mysql_mode() && ob_is_json(obj_type)) {
dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN);

View File

@ -131,23 +131,27 @@ int ObExprFuncPartHash::calc_hash_value_with_seed(const ObObj &obj, int64_t seed
ObObj obj_trimmed;
int32_t val_len = obj.get_val_len();
const char* obj1_str = obj.get_string_ptr();
bool is_utf16 = ObCharset::charset_type_by_coll(obj.get_collation_type()) == CHARSET_UTF16;
while (val_len >= (is_utf16 ? 2 : 1)) {
if (is_utf16
&& OB_PADDING_CHAR == *(obj1_str + val_len - 1)
&& OB_PADDING_BINARY == *(obj1_str + val_len - 2)) {
val_len -= 2;
} else if (OB_PADDING_CHAR == *(obj1_str + val_len - 1)) {
--val_len;
char* real_end = NULL;
// oracle hash test
if (OB_FAIL(common::ObCharset::trim_end_of_str(obj1_str, val_len, real_end,
ObCharset::charset_type_by_coll(obj.get_collation_type())))){
LOG_WARN("fail to trim end of str", K(ret));
} else if (OB_ISNULL(real_end)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null ptr", K(ret));
} else {
val_len = real_end - obj1_str;
if (val_len < 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected length", K(val_len));
} else {
break;
obj_trimmed.set_collation_type(obj.get_collation_type());
obj_trimmed.set_string(ObCharType, obj.get_string_ptr(), val_len);
if (OB_FAIL(obj_trimmed.hash_murmur(res, seed))) {
LOG_WARN("fail to do hash", K(ret));
}
}
}
obj_trimmed.set_collation_type(obj.get_collation_type());
obj_trimmed.set_string(ObCharType, obj.get_string_ptr(), val_len);
if (OB_FAIL(obj_trimmed.hash_murmur(res, seed))) {
LOG_WARN("fail to do hash", K(ret));
}
} else if (obj.is_decimal_int()) {
ret = wide::PartitionHash<ObMurmurHash, ObObj>::calculate(obj, seed, res);
} else {
@ -338,22 +342,21 @@ int ObExprFuncPartHash::eval_oracle_part_hash(
if (ObCharType == arg.datum_meta_.type_
|| ObNCharType == arg.datum_meta_.type_) {
ObDatum str = *d;
const bool is_utf16 = CHARSET_UTF16 == ObCharset::charset_type_by_coll(
arg.datum_meta_.cs_type_);
const char *end = str.ptr_ + str.len_;
while (end - str.ptr_ >= (is_utf16 ? 2 : 1)) {
if (is_utf16 && OB_PADDING_CHAR == *(end - 1) && OB_PADDING_BINARY == *(end - 2)) {
end -= 2;
} else if (OB_PADDING_CHAR == *(end - 1)) {
end -= 1;
} else {
break;
char *end = NULL;
if (OB_FAIL(common::ObCharset::trim_end_of_str(str.ptr_, str.len_, end, ObCharset::charset_type_by_coll(arg.datum_meta_.cs_type_)))) {
LOG_WARN("failed to trim str end");
} else if (OB_ISNULL(end)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get end of string", K(ret));
} else {
str.len_ = end - str.ptr_;
if (str.len_ < 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get end of string", K(str.len_));
} else if (OB_FAIL(arg.basic_funcs_->murmur_hash_(str, hash_val, hash_val))) {
LOG_WARN("hash failed", K(ret));
}
}
str.len_ = end - str.ptr_;
if (OB_FAIL(arg.basic_funcs_->murmur_hash_(str, hash_val, hash_val))) {
LOG_WARN("hash failed", K(ret));
}
} else if (arg.datum_meta_.type_ == ObDecimalIntType) {
ret = wide::PartitionHash<ObMurmurHash, ObDatum>::calculate(*d, hash_val, hash_val);
} else {

View File

@ -132,14 +132,14 @@ int ObExprNLSSort::eval_nlssort_inner(const ObExpr &expr,
LOG_WARN("invalid cs", K(ret), K(coll_type));
} else if (((ob_is_nchar(arg0_obj_type)) || (ob_is_char(arg0_obj_type, arg0_coll_type)))
&& (OB_FAIL(ObCharsetUtils::remove_char_endspace(input_str,
ObCharset::charset_type_by_coll(arg0_coll_type))))) {
ObCharset::get_charset(arg0_coll_type))))) {
LOG_WARN("remove char endspace failed", K(ret));
} else if (OB_FAIL(convert_to_coll_code(ctx, arg0_coll_type, input_str, coll_type, out))) {
LOG_WARN("convert to coll code failed", K(ret));
} else {
LOG_DEBUG("check coll type", K(coll_type), K(arg0_coll_type), K(expr),
K(arg0_obj_type), K(out.length()));
size_t buf_len = cs->coll->strnxfrmlen(cs, out.length());
size_t buf_len = cs->coll->strnxfrmlen(cs, cs->mbmaxlen*out.length());
char *buf = NULL;
size_t result_len = 0;
if (OB_ISNULL(buf = expr.get_str_res_mem(ctx, buf_len))) {

View File

@ -969,7 +969,7 @@ int ObSPIService::spi_calc_expr(ObPLExecCtx *ctx,
} else {
ObString res = result->get_string();
OZ (ObCharsetUtils::remove_char_endspace( // this function only adjust res.data_length_
res, ObCharset::charset_type_by_coll(result->get_collation_type())));
res, ObCharset::get_charset(result->get_collation_type())));
OX (result->val_len_ = res.length());
}
} else {

View File

@ -643,6 +643,12 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "charset not supported when data_version < 4_2_4_0 or between [430,433)",K(charset_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.4 or between [430,433), charset is");
} else if ((CHARSET_SJIS == charset_type || CHARSET_HKSCS == charset_type || CHARSET_HKSCS31 == charset_type
|| CHARSET_DEC8 == charset_type || CHARSET_BIG5 == charset_type || CHARSET_UTF16LE == charset_type)
&& ((data_version < MOCK_DATA_VERSION_4_2_5_0) || (DATA_VERSION_4_3_0_0 <= data_version && data_version < DATA_VERSION_4_3_4_0))) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "charset not supported when data_version < 4_2_5_0 or between [430,434)",K(charset_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.5 or between [430,434), charset is");
}
return ret;
}
@ -655,12 +661,26 @@ int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type,
SQL_LOG(WARN, "failed to GET_MIN_DATA_VERSION", K(ret));
} else if ((data_version < MOCK_DATA_VERSION_4_2_4_0
|| (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_3_0))
&& (CS_TYPE_UTF8MB4_CROATIAN_CI == collation_type
&& (CS_TYPE_UTF8MB4_CROATIAN_UCA_CI == collation_type
|| CS_TYPE_UTF8MB4_UNICODE_520_CI == collation_type
|| CS_TYPE_UTF8MB4_CZECH_CI == collation_type)) {
|| CS_TYPE_UTF8MB4_CZECH_UCA_CI == collation_type
|| CS_TYPE_UTF8MB4_0900_AI_CI == collation_type)) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_2_0", K(collation_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.2, unicode collation is");
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433)", K(collation_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433), unicode collation is");
} else if ((data_version < MOCK_DATA_VERSION_4_2_5_0
|| (data_version >= DATA_VERSION_4_3_0_0 && data_version < DATA_VERSION_4_3_4_0))
&& (CS_TYPE_UTF8MB4_ZH_0900_AS_CS != collation_type &&
CS_TYPE_UTF8MB4_CROATIAN_UCA_CI != collation_type &&
CS_TYPE_UTF8MB4_UNICODE_520_CI != collation_type &&
CS_TYPE_UTF8MB4_CZECH_UCA_CI != collation_type &&
CS_TYPE_UTF8MB4_0900_AI_CI != collation_type &&
((CS_TYPE_UTF8MB4_0900_AI_CI <= collation_type && collation_type <= CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS)
|| (CS_TYPE_UTF16_ICELANDIC_UCA_CI <= collation_type && collation_type <= CS_TYPE_UTF16_VIETNAMESE_CI)
|| (CS_TYPE_UTF8MB4_ICELANDIC_UCA_CI <= collation_type && collation_type <= CS_TYPE_UTF8MB4_VIETNAMESE_CI)))) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_5_0 or between [430,434)", K(collation_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Unicode collation not supported when data_version < 4_2_5_0 or between [430,434), unicode collation is");
}
#ifndef OB_BUILD_CLOSE_MODULES
if (OB_SUCC(ret)) {
@ -673,15 +693,6 @@ int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type,
}
}
#endif
if (OB_SUCC(ret)) {
if ((CS_TYPE_UTF8MB4_0900_AI_CI == collation_type) &&
((data_version < MOCK_DATA_VERSION_4_2_4_0) ||
(DATA_VERSION_4_3_0_0 <= data_version && data_version < DATA_VERSION_4_3_3_0))) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_4_0 or between [430,433)", K(collation_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.4 or between [430,433), collation is");
}
}
return ret;
}
@ -1254,31 +1265,37 @@ int ObSQLUtils::check_and_convert_table_name(const ObCollationType cs_type,
char origin_name[OB_MAX_USER_TABLE_NAME_LENGTH_ORACLE * OB_MAX_CHAR_LEN + 1] = {'\0'};
MEMCPY(origin_name, name_str, name_len);
if (!preserve_lettercase) {
ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, name);
}
bool check_for_path_chars = false;
int64_t max_ident_len = max_user_table_name_length;
if ((stmt::T_SELECT == stmt_type || stmt::T_INSERT == stmt_type) && is_index_table) {
//索引表会有额外前缀,因此查询时长度限制用OB_MAX_TABLE_NAME_LENGTH
max_ident_len = OB_MAX_TABLE_NAME_LENGTH;
}
if (OB_ERR_WRONG_IDENT_NAME == (ret = check_ident_name(CS_TYPE_UTF8MB4_GENERAL_CI,
name,
check_for_path_chars,
max_ident_len))) {
if (lib::is_oracle_mode()) {
// It allows the last char of table name and index name is space in oracle mode
ret = OB_SUCCESS;
} else {
size_t sz = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, name);
if (sz == 0) {
ret = OB_WRONG_TABLE_NAME;
LOG_USER_ERROR(OB_WRONG_TABLE_NAME, (int)strlen(origin_name), origin_name);
LOG_WARN("Incorrect table name", K(origin_name), K(ret));
LOG_WARN("fail to convert table name to lower case", K(name), K(ret));
}
}
if (OB_SUCC(ret)) {
bool check_for_path_chars = false;
int64_t max_ident_len = max_user_table_name_length;
if ((stmt::T_SELECT == stmt_type || stmt::T_INSERT == stmt_type) && is_index_table) {
//索引表会有额外前缀,因此查询时长度限制用OB_MAX_TABLE_NAME_LENGTH
max_ident_len = OB_MAX_TABLE_NAME_LENGTH;
}
if (OB_ERR_WRONG_IDENT_NAME == (ret = check_ident_name(CS_TYPE_UTF8MB4_GENERAL_CI,
name,
check_for_path_chars,
max_ident_len))) {
if (lib::is_oracle_mode()) {
// It allows the last char of table name and index name is space in oracle mode
ret = OB_SUCCESS;
} else {
ret = OB_WRONG_TABLE_NAME;
LOG_USER_ERROR(OB_WRONG_TABLE_NAME, (int)strlen(origin_name), origin_name);
LOG_WARN("Incorrect table name", K(origin_name), K(ret));
}
} else if (OB_ERR_TOO_LONG_IDENT == ret) {
LOG_USER_ERROR(OB_ERR_TOO_LONG_IDENT, (int)strlen(origin_name), origin_name);
LOG_WARN("table name is too long", K(origin_name), K(max_ident_len), K(ret), K(stmt_type), K(is_index_table));
} else if (OB_FAIL(ret)) {
LOG_WARN("fail to check ident name", K(origin_name), K(ret));
}
} else if (OB_ERR_TOO_LONG_IDENT == ret) {
LOG_USER_ERROR(OB_ERR_TOO_LONG_IDENT, (int)strlen(origin_name), origin_name);
LOG_WARN("table name is too long", K(origin_name), K(max_ident_len), K(ret), K(stmt_type), K(is_index_table));
} else if (OB_FAIL(ret)) {
LOG_WARN("fail to check ident name", K(origin_name), K(ret));
}
}
return ret;

View File

@ -56,6 +56,8 @@ if (OB_BUILD_ORACLE_PARSER)
sql_parser_oracle_single_byte_mode_lex.h
sql_parser_oracle_single_byte_mode_tab.c
sql_parser_oracle_single_byte_mode_tab.h
sql_parser_oracle_hkscs_mode_lex.c
sql_parser_oracle_hkscs_mode_tab.c
)
set(ob_inner_sql_parser_object_list
@ -63,6 +65,7 @@ if (OB_BUILD_ORACLE_PARSER)
non_reserved_keywords_oracle_utf8_mode.c
non_reserved_keywords_oracle_gbk_mode.c
non_reserved_keywords_oracle_single_byte_mode.c
non_reserved_keywords_oracle_hkscs_mode.c
)
endif()
@ -89,6 +92,7 @@ set(ob_inner_sql_parser_object_list
ob_char_type.h
ob_fast_parser.h
ob_fast_parser.cpp
ob_parser_charset_utils.cpp
sql_parser_base.c
sql_parser_base.h
sql_parser_base.h

View File

@ -206,6 +206,57 @@ rm -f ../../../src/sql/parser/gbk.txt
rm -f ../../../src/sql/parser/sql_parser_oracle_gbk_mode.l
rm -f ../../../src/sql/parser/sql_parser_oracle_gbk_mode.y
# generate oracle hkscs sql_parser(support multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis)
##1.copy lex and yacc files
cat ../../../src/sql/parser/sql_parser_oracle_mode.y > ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
cat ../../../src/sql/parser/sql_parser_oracle_mode.l > ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
##2.replace name
sed "s/obsql_oracle_yy/obsql_oracle_hkscs_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
sed "s/obsql_oracle_yy/obsql_oracle_hkscs_yy/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/sql_parser_oracle_mode/sql_parser_oracle_hkscs_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
sed "s/sql_parser_oracle_mode/sql_parser_oracle_hkscs_mode/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_hkscs_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
sed "s/obsql_oracle_parser_fatal_error/obsql_oracle_hkscs_parser_fatal_error/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/obsql_oracle_fast_parse/obsql_oracle_hkscs_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
sed "s/obsql_oracle_multi_fast_parse/obsql_oracle_hkscs_multi_fast_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
sed "s/obsql_oracle_multi_values_parse/obsql_oracle_hkscs_multi_values_parse/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
##3.add multi_byte_space、multi_byte_comma、multi_byte_left_parenthesis、multi_byte_right_parenthesis code.
sed "s/multi_byte_space \[\\\u3000\]/multi_byte_space ([\\\xa1][\\\x40])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/multi_byte_comma \[\\\uff0c\]/multi_byte_comma ([\\\xa1][\\\x41])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/multi_byte_left_parenthesis \[\\\uff08\]/multi_byte_left_parenthesis ([\\\xa1][\\\x5d])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed "s/multi_byte_right_parenthesis \[\\\uff09\]/multi_byte_right_parenthesis ([\\\xa1][\\\x5e])/g" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
echo "HK_1 [\x81-\xfe]
HK_1_1 [\x81-\xa0]
HK_1_2 [\xa1]
HK_1_3 [\xa2-\xfe]
HK_2fb [\x40-\x7e]
HK_2fb_1 [\x42-\x5c]
HK_2fb_2 [\x5f-\xa1]
HK_2sb [\xa1-\xfe]
g_except_space_comma_parenthesis ({HK_1_2}{HK_2fb_1}|{HK_1_2}{HK_2fb_2})
HK_CHAR ({HK_1_1}{HK_2fb}|{HK_1_1}{HK_2sb}|{g_except_space_comma_parenthesis}|{HK_1_2}{HK_2sb}|{HK_1_3}{HK_2fb}|{HK_1_3}{HK_2sb})" > ../../../src/sql/parser/hkscs.txt
sed '/following character status will be rewrite by gen_parse.sh according to connection character/d' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//r ../../../src/sql/parser/hkscs.txt' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed '/multi_byte_connect_char \/\*According to connection character to set by gen_parse.sh\*\//d' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed 's/space \[ \\t\\n\\r\\f\]/space (\[ \\t\\n\\r\\f\]|{multi_byte_space})/g' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
sed 's/multi_byte_connect_char/HK_CHAR/g' -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
##4.generate oracle hkscs parser files
bison_parser ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_tab.c
flex -o ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_tab.h
##5.replace other info
sed "/Setup the input buffer state to scan the given bytes/,/}/{/int i/d}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{/for ( i = 0; i < _yybytes_len; ++i )/d}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
sed "/Setup the input buffer state to scan the given bytes/,/}/{s/\tbuf\[i\] = yybytes\[i\]/memcpy(buf, yybytes, _yybytes_len)/g}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
sed "/obsql_oracle_hkscs_yylex_init is special because it creates the scanner itself/,/Initialization is the same as for the non-reentrant scanner/{s/return 1/return errno/g}" -i ../../../src/sql/parser/sql_parser_oracle_hkscs_mode_lex.c
cat ../../../src/sql/parser/non_reserved_keywords_oracle_mode.c > ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
sed '/#include "ob_non_reserved_keywords.h"/a\#include "sql/parser/sql_parser_oracle_hkscs_mode_tab.h\"' -i ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
sed "s/non_reserved_keywords_oracle_mode.c is for …/non_reserved_keywords_oracle_hkscs_mode.c is auto generated by gen_parser.sh/g" -i ../../../src/sql/parser/non_reserved_keywords_oracle_hkscs_mode.c
##6.clean useless files
rm -f ../../../src/sql/parser/hkscs.txt
rm -f ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.l
rm -f ../../../src/sql/parser/sql_parser_oracle_hkscs_mode.y
rm -rf ../../../src/sql/parser/sql_parser_oracle_mode.y
rm -rf ../../../src/sql/parser/sql_parser_oracle_mode.l

View File

@ -471,6 +471,8 @@ inline int64_t ObFastParserBase::is_identifier_flags(const int64_t pos)
idf_pos = is_gbk_char(pos);
} else if (charset_info_->mbmaxlen == 1) {
idf_pos = is_single_byte_char(pos);
} else if (CHARSET_HKSCS == charset_type_ || CHARSET_HKSCS31 == charset_type_) {
idf_pos = is_hk_char(pos);
}
return idf_pos;
}
@ -1075,6 +1077,47 @@ inline int64_t ObFastParserBase::is_utf8_multi_byte_right_parenthesis(
return idf_pos;
}
inline int64_t ObFastParserBase::is_hk_multi_byte_space(const char *str, const int64_t pos)
{
int64_t idf_pos = -1;
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
0x40 == static_cast<uint8_t>(str[pos + 1])) {
idf_pos = pos + 2;
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_hk_multi_byte_comma(const char *str, const int64_t pos)
{
int64_t idf_pos = -1;
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
0x41 == static_cast<uint8_t>(str[pos + 1])) {
idf_pos = pos + 2;
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_hk_multi_byte_left_parenthesis(
const char *str, const int64_t pos)
{
int64_t idf_pos = -1;
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
0x5d == static_cast<uint8_t>(str[pos + 1])) {
idf_pos = pos + 2;
}
return idf_pos;
}
inline int64_t ObFastParserBase::is_hk_multi_byte_right_parenthesis(
const char *str, const int64_t pos)
{
int64_t idf_pos = -1;
if (0xa1 == static_cast<uint8_t>(str[pos]) &&
0x5e == static_cast<uint8_t>(str[pos + 1])) {
idf_pos = pos + 2;
}
return idf_pos;
}
// ([\\\xa1][\\\xa1])
inline int64_t ObFastParserBase::is_gbk_multi_byte_space(const char *str, const int64_t pos)
{
@ -1138,6 +1181,22 @@ inline int64_t ObFastParserBase::is_gbk_char(const int64_t pos)
return idf_pos;
}
inline int64_t ObFastParserBase::is_hk_char(const int64_t pos)
{
int64_t idf_pos = -1;
if (is_oracle_mode_ &&
pos + 2 < raw_sql_.raw_sql_len_ &&
(-1 != is_hk_multi_byte_space(raw_sql_.raw_sql_, pos) ||
-1 != is_hk_multi_byte_comma(raw_sql_.raw_sql_, pos) ||
-1 != is_hk_multi_byte_left_parenthesis(raw_sql_.raw_sql_, pos) ||
-1 != is_hk_multi_byte_right_parenthesis(raw_sql_.raw_sql_, pos))) {
raw_sql_.scan(2);
} else if (is_hk1(raw_sql_.char_at(pos)) && is_hk2(raw_sql_.char_at(pos + 1))) {
idf_pos = pos + 2;
}
return idf_pos;
}
int64_t ObFastParserBase::is_whitespace(int64_t pos)
{
int64_t ws_end_pos = -1;
@ -1410,6 +1469,27 @@ char *ObFastParserBase::parse_strdup_with_replace_multi_byte_char(
} else {
out_str[len++] = str[i];
}
} else if (
charset_type_ == 152
|| charset_type_ == 153) {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0x40) {//hkscs multi byte space
out_str[len++] = ' ';
++i;
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5d) {
//hkscs multi byte left parenthesis
out_str[len++] = '(';
++i;
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5e) {
//hkscs multi byte right parenthesis
out_str[len++] = ')';
++i;
} else {
out_str[len++] = str[i];
}
} else {
out_str[len++] = str[i];
}
} else {
out_str[len++] = str[i];
}
@ -1636,6 +1716,8 @@ inline int64_t ObFastParserBase::is_first_identifier_flags(const int64_t pos)
idf_pos = is_gbk_char(pos);
} else if (charset_info_->mbmaxlen == 1) {
idf_pos = is_single_byte_char(pos);
} else if (CHARSET_HKSCS == charset_type_ || CHARSET_HKSCS31 == charset_type_) {
idf_pos = is_hk_char(pos);
}
return idf_pos;
}

View File

@ -399,6 +399,11 @@ protected:
// ([\\\xef\][\\\xbc\][\\\x89])
int64_t is_utf8_multi_byte_right_parenthesis(const char *str, const int64_t start_pos);
// {GB_1}{GB_2}
int64_t is_hk_char(const int64_t pos);
int64_t is_hk_multi_byte_space(const char *str, const int64_t start_pos);
int64_t is_hk_multi_byte_comma(const char *str, const int64_t start_pos);
int64_t is_hk_multi_byte_left_parenthesis(const char *str, const int64_t start_pos);
int64_t is_hk_multi_byte_right_parenthesis(const char *str, const int64_t start_pos);
int64_t is_gbk_char(const int64_t pos);
// ([\\\xa1][\\\xa1])
int64_t is_gbk_multi_byte_space(const char *str, const int64_t start_pos);
@ -438,12 +443,28 @@ protected:
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x81 && static_cast<uint8_t>(ch) <= 0xfe;
}
// [\x81-\xfe]
inline bool is_hk1(char ch)
{
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x81 && static_cast<uint8_t>(ch) <= 0xfe;
}
// [\x40-\xfe]
inline bool is_gb2(char ch)
{
return is_valid_char(ch) &&
static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0xfe;
}
// [\x81-\xfe]
inline bool is_hk2(char ch)
{
return is_valid_char(ch) &&
((static_cast<uint8_t>(ch) >= 0x40 && static_cast<uint8_t>(ch) <= 0x7e)
|| (static_cast<uint8_t>(ch) >= 0xa1 && static_cast<uint8_t>(ch) <= 0xfe));
}
inline bool notascii(char ch)
{
return is_valid_char(ch) &&

View File

@ -0,0 +1,100 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "ob_parser_charset_utils.h"
#include "lib/charset/ob_charset.h"
#ifdef __cplusplus
extern "C"
{
#endif
namespace oceanbase{
int obcharset_is_gb_charset_of_collation(ObCollationType collation_type, bool *is_gb) {
int ret = OB_SUCCESS;
*is_gb = false;
if (collation_type == CS_TYPE_GBK_CHINESE_CI ||
collation_type == CS_TYPE_GBK_BIN ||
collation_type == CS_TYPE_GB18030_CHINESE_CI ||
collation_type == CS_TYPE_GB18030_BIN ||
collation_type == CS_TYPE_GB18030_CHINESE_CS ||
(collation_type >= CS_TYPE_GB18030_2022_BIN &&
collation_type <= CS_TYPE_GB18030_2022_STROKE_CS)) {
*is_gb = true;
}
return ret;
}
int obcharset_is_single_byte_charset_of_collation(ObCollationType collation_type, bool *is_single_byte) {
int ret = OB_SUCCESS;
*is_single_byte = false;
if (collation_type == CS_TYPE_LATIN1_SWEDISH_CI ||
collation_type == CS_TYPE_LATIN1_BIN ||
collation_type == CS_TYPE_ASCII_GENERAL_CI ||
collation_type == CS_TYPE_ASCII_BIN ||
collation_type == CS_TYPE_TIS620_BIN ||
collation_type == CS_TYPE_TIS620_THAI_CI ||
collation_type == CS_TYPE_DEC8_BIN ||
collation_type == CS_TYPE_DEC8_SWEDISH_CI) {
*is_single_byte = true;
}
return ret;
}
int obcharset_is_utf8_charset_of_collation(ObCollationType collation_type, bool *is_utf8) {
int ret = OB_SUCCESS;
*is_utf8 = false;
if (collation_type == CS_TYPE_UTF8MB4_GENERAL_CI ||
collation_type == CS_TYPE_UTF8MB4_BIN ||
collation_type == CS_TYPE_UTF8MB4_UNICODE_CI ||
collation_type == CS_TYPE_BINARY ||
(collation_type >= CS_TYPE_UTF8MB4_0900_AI_CI &&
collation_type <= CS_TYPE_UTF8MB4_MN_CYRL_0900_AS_CS)
) {
*is_utf8 = true;
}
return ret;
}
int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type) {
int ret = OB_SUCCESS;
bool is_gb = false;
bool is_single_byte = false;
bool is_utf8 = false;
ObCollationType coll_type = static_cast<ObCollationType>(collation_type);
if (OB_ISNULL(parser_type)) {
ret = OB_INVALID_ARGUMENT;
} else if (OB_FAIL(obcharset_is_gb_charset_of_collation(coll_type, &is_gb))) {
/* do nothing */
} else if (is_gb) {
*parser_type = CHARSET_PARSER_TYPE_GB;
} else if (coll_type == CS_TYPE_HKSCS_BIN || coll_type == CS_TYPE_HKSCS31_BIN) {
*parser_type = CHARSET_PARSER_TYPE_HKSCS;
} else if (OB_FAIL(obcharset_is_single_byte_charset_of_collation(coll_type, &is_single_byte))) {
/* do nothing */
} else if (is_single_byte) {
*parser_type = CHARSET_PARSER_TYPE_SINGLE_BYTE;
} else if (OB_FAIL(obcharset_is_utf8_charset_of_collation(coll_type, &is_utf8))){
/* do nothing */
} else if (is_utf8) {
*parser_type = CHARSET_PARSER_TYPE_UTF8MB4;
} else {
ret = -1;
}
return ret;
}
}
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,36 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H
#define OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H
#ifdef __cplusplus
extern "C"
{
#endif
typedef enum ObCharsetParserType_ {
CHARSET_PARSER_TYPE_NONE = 0,
CHARSET_PARSER_TYPE_GB,
CHARSET_PARSER_TYPE_SINGLE_BYTE,
CHARSET_PARSER_TYPE_UTF8MB4,
CHARSET_PARSER_TYPE_HKSCS,
CHARSET_PARSER_TYPE_MAX,
} ObCharsetParserType;
int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type);
#ifdef __cplusplus
}
#endif
#endif //OCEANBASE_COMMON_OB_PARSER_CHARSET_UTILS_H

View File

@ -235,69 +235,78 @@ char *parse_strdup_with_replace_multi_byte_char(const char *str, int *connection
int64_t len = 0;
int64_t dup_len = strlen(str);
for (int64_t i = 0; i < dup_len; ++i) {
switch (*connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/: {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
out_str[len++] = ' ';
++i;
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa8) {
//gbk multi byte left parenthesis
out_str[len++] = '(';
++i;
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa9) {
//gbk multi byte right parenthesis
out_str[len++] = ')';
++i;
} else {
out_str[len++] = str[i];
}
if (*connection_collation_ == 28/*CS_TYPE_GBK_CHINESE_CI*/
|| *connection_collation_ == 87/*CS_TYPE_GBK_BIN*/
|| *connection_collation_ == 248/*CS_TYPE_GB18030_CHINESE_CI*/
|| *connection_collation_ == 249/*CS_TYPE_GB18030_BIN*/
|| (*connection_collation_ >= 216/*CS_TYPE_GB18030_2022_BIN*/
&& *connection_collation_ <= 222/*CS_TYPE_GB18030_2022_STROKE_CS*/)) {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0xa1) {//gbk multi byte space
out_str[len++] = ' ';
++i;
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa8) {
//gbk multi byte left parenthesis
out_str[len++] = '(';
++i;
} else if (str[i] == (char)0xa3 && str[i+1] == (char)0xa9) {
//gbk multi byte right parenthesis
out_str[len++] = ')';
++i;
} else {
out_str[len++] = str[i];
}
break;
} else {
out_str[len++] = str[i];
}
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 245/*CS_TYPE_UTF8MB4_CROATIAN_CI*/:
case 246/*CS_TYPE_UTF8MB4_UNICODE_520_CI*/:
case 234/*CS_TYPE_UTF8MB4_CZECH_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
{
if (i + 2 < dup_len) {
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {
//utf8 multi byte space
out_str[len++] = ' ';
i = i + 2;
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x88) {
//utf8 multi byte left parenthesis
out_str[len++] = '(';
i = i + 2;
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x89) {
//utf8 multi byte right parenthesis
out_str[len++] = ')';
i = i + 2;
} else {
out_str[len++] = str[i];
}
} else if (
*connection_collation_ == 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/
|| *connection_collation_ == 46/*CS_TYPE_UTF8MB4_BIN*/
|| *connection_collation_ == 63/*CS_TYPE_BINARY*/
|| *connection_collation_ == 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/
|| (*connection_collation_ >= 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/
&& *connection_collation_ <= 247/*CS_TYPE_UTF8MB4_VIETNAMESE_CI*/)) {
if (i + 2 < dup_len) {
if (str[i] == (char)0xe3 && str[i+1] == (char)0x80 && str[i+2] == (char)0x80) {
//utf8 multi byte space
out_str[len++] = ' ';
i = i + 2;
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x88) {
//utf8 multi byte left parenthesis
out_str[len++] = '(';
i = i + 2;
} else if (str[i] == (char)0xef && str[i+1] == (char)0xbc && str[i+2] == (char)0x89) {
//utf8 multi byte right parenthesis
out_str[len++] = ')';
i = i + 2;
} else {
out_str[len++] = str[i];
}
break;
} else {
out_str[len++] = str[i];
}
default:
} else if (
*connection_collation_ == 152
|| *connection_collation_ == 153) {
if (i + 1 < dup_len) {
if (str[i] == (char)0xa1 && str[i+1] == (char)0x40) {//hkscs multi byte space
out_str[len++] = ' ';
++i;
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5d) {
//hkscs multi byte left parenthesis
out_str[len++] = '(';
++i;
} else if (str[i] == (char)0xa1 && str[i+1] == (char)0x5e) {
//hkscs multi byte right parenthesis
out_str[len++] = ')';
++i;
} else {
out_str[len++] = str[i];
}
} else {
out_str[len++] = str[i];
}
} else {
out_str[len++] = str[i];
}
}

View File

@ -11,6 +11,7 @@
*/
#include "sql_parser_base.h"
#include "ob_parser_charset_utils.h"
#define YY_EXTRA_TYPE void *
#define yyconst const
@ -28,6 +29,7 @@ extern YY_BUFFER_STATE obsql_mysql_yy_scan_bytes (yyconst char *bytes,int len ,y
extern void obsql_mysql_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_mysql_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
#ifdef OB_BUILD_ORACLE_PARSER
extern int obsql_oracle_single_byte_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_single_byte_yyparse(ParseResult *result);
extern int obsql_oracle_single_byte_multi_fast_parse(ParseResult *p);
@ -37,6 +39,7 @@ extern int obsql_oracle_single_byte_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_single_byte_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_single_byte_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_single_byte_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obsql_oracle_utf8_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_utf8_yyparse(ParseResult *result);
extern int obsql_oracle_utf8_multi_fast_parse(ParseResult *p);
@ -46,6 +49,7 @@ extern int obsql_oracle_utf8_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_utf8_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_utf8_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_utf8_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obsql_oracle_gbk_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_gbk_yyparse(ParseResult *result);
extern int obsql_oracle_gbk_multi_fast_parse(ParseResult *p);
@ -55,7 +59,21 @@ extern int obsql_oracle_gbk_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_gbk_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_gbk_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_gbk_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obsql_oracle_hkscs_yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals );
extern int obsql_oracle_hkscs_yyparse(ParseResult *result);
extern int obsql_oracle_hkscs_multi_fast_parse(ParseResult *p);
extern int obsql_oracle_hkscs_multi_values_parse(ParseResult *p);
extern int obsql_oracle_hkscs_fast_parse(ParseResult *p);
extern int obsql_oracle_hkscs_yylex_destroy (yyscan_t yyscanner );
extern YY_BUFFER_STATE obsql_oracle_hkscs_yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
extern void obsql_oracle_hkscs_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
extern void obsql_oracle_hkscs_yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
extern int obcharset_get_parser_type_by_coll(const int collation_type, ObCharsetParserType *parser_type);
#endif
int parse_init(ParseResult *p)
{
int ret = 0; // can not include C++ file "ob_define.h"
@ -71,40 +89,29 @@ int parse_init(ParseResult *p)
if (OB_LIKELY( 0 == ret)) {
#ifdef OB_BUILD_ORACLE_PARSER
if (IS_ORACLE_COMPATIBLE) {
switch (p->connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/:
ret = obsql_oracle_gbk_yylex_init_extra(p, &(p->yyscan_info_));
break;
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
ret = obsql_oracle_utf8_yylex_init_extra(p, &(p->yyscan_info_));
break;
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
ret = obsql_oracle_single_byte_yylex_init_extra(p, &(p->yyscan_info_));
break;
default: {
ret = -1;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
p->connection_collation_);
break;
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
0 != ret) {
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
p->connection_collation_);
} else {
switch(type) {
case CHARSET_PARSER_TYPE_GB:
ret = obsql_oracle_gbk_yylex_init_extra(p, &(p->yyscan_info_));
break;
case CHARSET_PARSER_TYPE_SINGLE_BYTE:
ret = obsql_oracle_single_byte_yylex_init_extra(p, &(p->yyscan_info_));
break;
case CHARSET_PARSER_TYPE_UTF8MB4:
ret = obsql_oracle_utf8_yylex_init_extra(p, &(p->yyscan_info_));
break;
case CHARSET_PARSER_TYPE_HKSCS:
ret = obsql_oracle_hkscs_yylex_init_extra(p, &(p->yyscan_info_));
break;
default:
ret = -1;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
p->connection_collation_);
}
}
} else {
@ -132,40 +139,29 @@ int parse_terminate(ParseResult *p)
if (OB_LIKELY(NULL != p->yyscan_info_)) {
#ifdef OB_BUILD_ORACLE_PARSER
if (IS_ORACLE_COMPATIBLE) {
switch (p->connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/:
ret = obsql_oracle_gbk_yylex_destroy(p->yyscan_info_);
break;
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
ret = obsql_oracle_utf8_yylex_destroy(p->yyscan_info_);
break;
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
ret = obsql_oracle_single_byte_yylex_destroy(p->yyscan_info_);
break;
default: {
ret = -1;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
p->connection_collation_);
break;
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
0 != ret) {
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
p->connection_collation_);
} else {
switch(type) {
case CHARSET_PARSER_TYPE_GB:
ret = obsql_oracle_gbk_yylex_destroy(p->yyscan_info_);
break;
case CHARSET_PARSER_TYPE_SINGLE_BYTE:
ret = obsql_oracle_single_byte_yylex_destroy(p->yyscan_info_);
break;
case CHARSET_PARSER_TYPE_UTF8MB4:
ret = obsql_oracle_utf8_yylex_destroy(p->yyscan_info_);
break;
case CHARSET_PARSER_TYPE_HKSCS:
ret = obsql_oracle_hkscs_yylex_destroy(p->yyscan_info_);
break;
default:
ret = -1;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
p->connection_collation_);
}
}
} else {
@ -233,113 +229,125 @@ int parse_sql(ParseResult *p, const char *buf, size_t input_len)
} else {
#ifdef OB_BUILD_ORACLE_PARSER
if (IS_ORACLE_COMPATIBLE) {
switch (p->connection_collation_) {
case 28/*CS_TYPE_GBK_CHINESE_CI*/:
case 87/*CS_TYPE_GBK_BIN*/:
case 216/*CS_TYPE_GB18030_2022_BIN*/:
case 217/*CS_TYPE_GB18030_2022_PINYIN_CI*/:
case 218/*CS_TYPE_GB18030_2022_PINYIN_CS*/:
case 219/*CS_TYPE_GB18030_2022_RADICAL_CI*/:
case 220/*CS_TYPE_GB18030_2022_RADICAL_CS*/:
case 221/*CS_TYPE_GB18030_2022_STROKE_CI*/:
case 222/*CS_TYPE_GB18030_2022_STROKE_CS*/:
case 248/*CS_TYPE_GB18030_CHINESE_CI*/:
case 249/*CS_TYPE_GB18030_BIN*/: {
YY_BUFFER_STATE bp = obsql_oracle_gbk_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_gbk_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_gbk_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_gbk_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_gbk_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_gbk_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
ObCharsetParserType type = CHARSET_PARSER_TYPE_NONE;
if (ret = obcharset_get_parser_type_by_coll(p->connection_collation_, &type),
0 != ret) {
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get charset failed: %u",
p->connection_collation_);
} else {
switch(type) {
case CHARSET_PARSER_TYPE_GB: {
YY_BUFFER_STATE bp = obsql_oracle_gbk_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_gbk_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_gbk_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_gbk_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_gbk_multi_values_parse(p);
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
tmp_ret = obsql_oracle_gbk_yyparse(p);
}
}
obsql_oracle_gbk_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
case 45/*CS_TYPE_UTF8MB4_GENERAL_CI*/:
case 46/*CS_TYPE_UTF8MB4_BIN*/:
case 63/*CS_TYPE_BINARY*/:
case 224/*CS_TYPE_UTF8MB4_UNICODE_CI*/:
case 255/*CS_TYPE_UTF8MB4_0900_AI_CI*/:
{
YY_BUFFER_STATE bp = obsql_oracle_utf8_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_utf8_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_utf8_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_utf8_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_utf8_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_utf8_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
}
}
obsql_oracle_gbk_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
obsql_oracle_utf8_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
case 11/*CS_TYPE_ASCII_GENERAL_CI*/:
case 65/*CS_TYPE_ASCII_BIN*/:
case 18/*CS_TYPE_TIS620_THAI_CI*/:
case 89/*CS_TYPE_TIS620_BIN*/:
case 8/*CS_TYPE_LATIN1_SWEDISH_CI*/:
case 47/*CS_TYPE_LATIN1_BIN*/:{
YY_BUFFER_STATE bp = obsql_oracle_single_byte_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_single_byte_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_single_byte_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_single_byte_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_single_byte_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_single_byte_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
case CHARSET_PARSER_TYPE_SINGLE_BYTE: {
YY_BUFFER_STATE bp = obsql_oracle_single_byte_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_single_byte_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_single_byte_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_single_byte_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_single_byte_multi_values_parse(p);
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
tmp_ret = obsql_oracle_single_byte_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
}
}
obsql_oracle_single_byte_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
obsql_oracle_single_byte_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
default: {
ret = OB_PARSER_ERR_UNEXPECTED;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support conn collation: %u",
p->connection_collation_);
break;
case CHARSET_PARSER_TYPE_UTF8MB4: {
YY_BUFFER_STATE bp = obsql_oracle_utf8_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_utf8_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_utf8_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_utf8_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_utf8_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_utf8_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
}
}
obsql_oracle_utf8_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
case CHARSET_PARSER_TYPE_HKSCS: {
YY_BUFFER_STATE bp = obsql_oracle_hkscs_yy_scan_bytes(buf, len, p->yyscan_info_);
obsql_oracle_hkscs_yy_switch_to_buffer(bp, p->yyscan_info_);
int tmp_ret = -1;
if (p->is_fp_) {
tmp_ret = obsql_oracle_hkscs_fast_parse(p);
} else if (p->is_multi_query_) {
tmp_ret = obsql_oracle_hkscs_multi_fast_parse(p);
} else if (p->is_multi_values_parser_) {
tmp_ret = obsql_oracle_hkscs_multi_values_parse(p);
} else {
tmp_ret = obsql_oracle_hkscs_yyparse(p);
}
if (0 == tmp_ret) {
ret = OB_PARSER_SUCCESS;
} else if (2 == tmp_ret) {
ret = OB_PARSER_ERR_NO_MEMORY;
} else {
if (0 != p->extra_errno_) {
ret = p->extra_errno_;
} else {
ret = OB_PARSER_ERR_PARSE_SQL;
}
}
obsql_oracle_hkscs_yy_delete_buffer(bp, p->yyscan_info_);
break;
}
default:
ret = -1;
(void)snprintf(p->error_msg_, MAX_ERROR_MSG, "get not support connection collation: %u",
p->connection_collation_);
}
}
#endif

View File

@ -29,6 +29,8 @@
#include "parse_malloc.h"
#include "ob_non_reserved_keywords.h"
#include "parse_define.h"
#include "ob_parser_charset_utils.h"
#define MAX_VARCHAR_LENGTH 4194303
#define INT16NUM_OVERFLOW INT16_MAX

View File

@ -121,6 +121,12 @@ _ASCII { REPUT_TOKEN_NEG_SIGN(_ASCII); }
_TIS620 { REPUT_TOKEN_NEG_SIGN(_TIS620); }
_BINARY { REPUT_TOKEN_NEG_SIGN(_BINARY); }
_UTF16 { REPUT_TOKEN_NEG_SIGN(_UTF16); }
_UTF16LE { REPUT_TOKEN_NEG_SIGN(_UTF16LE); }
_SJIS { REPUT_TOKEN_NEG_SIGN(_SJIS); }
_BIG5 { REPUT_TOKEN_NEG_SIGN(_BIG5); }
_HKSCS { REPUT_TOKEN_NEG_SIGN(_HKSCS); }
_HKSCS31 { REPUT_TOKEN_NEG_SIGN(_HKSCS31); }
_DEC8 { REPUT_TOKEN_NEG_SIGN(_DEC8); }
NOT {
int32_t token_ret = NOT; /*fast parameterize don't care NOT or NOT2*/
if (!(IS_FAST_PARAMETERIZE)) {

View File

@ -208,7 +208,7 @@ DYNAMIC_SAMPLING
NEG_SIGN
%token /*can not be relation name*/
_BINARY _UTF8 _UTF8MB4 _UTF8MB3 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 _ASCII _TIS620 CNNOP
_BINARY _UTF8 _UTF8MB4 _UTF8MB3 _GBK _UTF16 _GB18030 _GB18030_2022 _LATIN1 _ASCII _TIS620 _UTF16LE _SJIS _BIG5 _DEC8 _HKSCS _HKSCS31 CNNOP
SELECT_HINT_BEGIN UPDATE_HINT_BEGIN DELETE_HINT_BEGIN INSERT_HINT_BEGIN REPLACE_HINT_BEGIN HINT_HINT_BEGIN HINT_END
LOAD_DATA_HINT_BEGIN CREATE_HINT_BEGIN ALTER_HINT_BEGIN
END_P SET_VAR DELIMITER
@ -1182,6 +1182,60 @@ _UTF8
YYABORT_NO_MEMORY;
}
}
| _UTF16LE
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("utf16le", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
| _SJIS
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("sjis", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
| _BIG5
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("big5", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
| _HKSCS
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("hkscs", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
| _HKSCS31
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("hkscs31", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
| _DEC8
{
malloc_terminal_node($$, result->malloc_pool_, T_CHARSET);
$$->str_value_ = parse_strdup("dec8", result->malloc_pool_, &($$->str_len_));
if (OB_UNLIKELY(NULL == $$->str_value_)) {
yyerror(NULL, result, "no more space for mallocing string\n");
YYABORT_NO_MEMORY;
}
}
;
literal:

View File

@ -272,9 +272,12 @@ int ObLoadDataResolver::resolve(const ParseNode &parse_tree)
load_args.file_cs_type_ = CS_TYPE_UTF8MB4_BIN;
}
if (OB_SUCC(ret)) {
if (ObCharset::charset_type_by_coll(load_args.file_cs_type_) == CHARSET_UTF16) {
int64_t mbminlen = 0;
if (OB_FAIL(common::ObCharset::get_mbminlen_by_coll(load_args.file_cs_type_, mbminlen))) {
LOG_WARN("unexpected error ", K(ret));
} else if (mbminlen > 1) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "utf16 encoded files are");
LOG_USER_ERROR(OB_NOT_SUPPORTED, "compatible with ascii files are");
}
}
}

View File

@ -55,7 +55,11 @@ int ObSetNamesResolver::resolve(const ParseNode &parse_tree)
// 目前支持gbk,utf16和utf8mb4,只有set names utf16不支持
// 如果后续支持更多的字符集,这里需要考虑怎么实现形式更好,
// 最好使用函数,目前没有必要
if (0 == charset.case_compare("utf16")) {
ObCollationType col_type = ObCharset::get_default_collation(ObCharset::charset_type(charset));
if (!ObCharset::is_valid_collation(col_type)) {
ret = OB_ERR_UNKNOWN_CHARSET;
LOG_USER_ERROR(OB_ERR_UNKNOWN_CHARSET, charset.length(), charset.ptr());
} else if (ObCharset::get_charset(col_type)->mbminlen > 1) {
ret = OB_ERR_WRONG_VALUE_FOR_VAR;
LOG_USER_ERROR(OB_ERR_WRONG_VALUE_FOR_VAR,
static_cast<int>(strlen("character_set_client")), "character_set_client",

View File

@ -410,15 +410,22 @@ int ObDCLResolver::resolve_user_list_node(ParseNode *user_node,
LOG_WARN("The child of user node should not be NULL", K(ret));
} else {
ParseNode *user_hostname_node = user_node;
user_name = ObString (user_hostname_node->children_[0]->str_len_, user_hostname_node->children_[0]->str_value_);
if (NULL == user_hostname_node->children_[1]) {
if (user_hostname_node->children_[0]->type_ != T_IDENT && OB_FAIL(ObSQLUtils::convert_sql_text_to_schema_for_storing(
*allocator_, session_info_->get_dtc_params(), user_name))) {
LOG_WARN("fail to convert user name to utf8", K(ret), K(user_name),
KPHEX(user_name.ptr(), user_name.length()));
} else if (NULL == user_hostname_node->children_[1]) {
host_name.assign_ptr(OB_DEFAULT_HOST_NAME, static_cast<int32_t>(STRLEN(OB_DEFAULT_HOST_NAME)));
} else {
host_name.assign_ptr(user_hostname_node->children_[1]->str_value_,
static_cast<int32_t>(user_hostname_node->children_[1]->str_len_));
}
if (OB_FAIL(schema_checker_->get_user_info(params_.session_info_->get_effective_tenant_id(),
user_name, host_name, user_info))) {
if (OB_FAIL(ret)) {
LOG_WARN("failed to get user name", K(ret), K(user_name));
} else if (OB_FAIL(schema_checker_->get_user_info(params_.session_info_->get_effective_tenant_id(),
user_name, host_name, user_info))) {
LOG_WARN("failed to get user info", K(ret), K(user_name));
if (OB_USER_NOT_EXIST == ret) {
// 跳过, RS统一处理, 兼容MySQL行为

View File

@ -174,9 +174,11 @@ int ObCreateTenantResolver::resolve(const ParseNode &parse_tree)
}
if (OB_SUCC(ret)) {
if (CHARSET_UTF16 == charset_type) {
ObCollationType col_type = ObCharset::get_default_collation(charset_type);
if (!ObCharset::is_valid_collation(col_type) ||
ObCharset::get_charset(ObCharset::get_default_collation(charset_type))->mbminlen > 1) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Use utf16 as database charset");
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Use utf16 and utf16le as database charset");
}
}

View File

@ -1120,6 +1120,7 @@ int ObCreateViewResolver::print_star_expanded_view_stmt(common::ObString &expand
LOG_WARN("failed to append comma", K(ret));
} else {
ObSqlString column_name;
ObString column_name_copy;
if (start_pos != end_pos && OB_FAIL(expanded_str.append(table_name))) {
LOG_WARN("failed to append table_name", K(ret));
} else if (OB_FAIL(column_name.append("\""))) {
@ -1128,7 +1129,11 @@ int ObCreateViewResolver::print_star_expanded_view_stmt(common::ObString &expand
LOG_WARN("failed to append column name", K(ret));
} else if (OB_FAIL(column_name.append("\""))) {
LOG_WARN("failed to append quote", K(ret));
} else if (OB_FAIL(expanded_str.append(column_name.string()))) {
} else if (OB_FAIL(ob_write_string(*allocator_, column_name.string(), column_name_copy, true))) {
LOG_WARN("failed to write string", K(ret));
} else if (OB_FAIL(ObSQLUtils::convert_sql_text_from_schema_for_resolve(*allocator_, session_info_->get_dtc_params(), column_name_copy))) {
LOG_WARN("failed to convert sql text", K(ret));
} else if (OB_FAIL(expanded_str.append(column_name_copy))) {
LOG_WARN("failed to append column name", K(ret));
}
}

View File

@ -238,7 +238,7 @@ int ObFTParseHelper::segment(
} else if (OB_ISNULL(allocator_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("allocator ptr is nullptr", K(ret), KP_(allocator), K_(is_inited));
} else if (OB_UNLIKELY(CS_TYPE_INVALID == type || type >= CS_TYPE_EXTENDED_MARK)) {
} else if (OB_UNLIKELY(CS_TYPE_INVALID == type || type >= CS_TYPE_PINYIN_BEGIN_MARK)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(type));
} else if (OB_ISNULL(cs = common::ObCharset::get_charset(type))) {

View File

@ -1,9 +1,4 @@
let $cluster_is_ob = 1;
let $cluster_is_mysql = 0;
let $collation_count = query_get_value(select count(1) as cnt from information_schema.COLLATIONS, cnt, 1);
if ($collation_count > 100) {
let $cluster_is_ob = 0;
let $cluster_is_mysql = 1;
}

View File

@ -992,13 +992,19 @@ select * from information_schema.character_sets;
| CHARACTER_SET_NAME | DEFAULT_COLLATE_NAME | DESCRIPTION | MAXLEN |
+--------------------+-------------------------+-----------------------+--------+
| ascii | ascii_general_ci | US ASCII | 1 |
| big5 | big5_chinese_ci | BIG5 | 2 |
| binary | binary | Binary pseudo charset | 1 |
| dec8 | dec8_swedish_ci | DEC West European | 1 |
| gb18030 | gb18030_chinese_ci | GB18030 charset | 4 |
| gb18030_2022 | gb18030_2022_chinese_ci | GB18030-2022 charset | 4 |
| gbk | gbk_chinese_ci | GBK charset | 2 |
| hkscs | hkscs_bin | HKSCS | 2 |
| hkscs31 | hkscs31_bin | HKSCS-ISO UNICODE 31 | 2 |
| latin1 | latin1_swedish_ci | cp1252 West European | 1 |
| sjis | sjis_japanese_ci | SJIS | 2 |
| tis620 | tis620_thai_ci | TIS620 Thai | 1 |
| utf16 | utf16_general_ci | UTF-16 Unicode | 2 |
| utf16 | utf16_general_ci | UTF-16 Unicode | 4 |
| utf16le | utf16le_general_ci | UTF-16LE Unicode | 4 |
| utf8mb4 | utf8mb4_general_ci | UTF-8 Unicode | 4 |
+--------------------+-------------------------+-----------------------+--------+
select * from information_schema.statistics where table_schema in ('oceanbase', 'mysql', 'information_schema') and TABLE_NAME not like "ob_all_proxy%" order by TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME;

View File

@ -13,8 +13,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf16_unicode_ci utf16 101 Yes 8
gb18030_chinese_ci gb18030 248 Yes Yes 2
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -26,23 +24,146 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
ascii_general_ci ascii 11 Yes Yes 1
ascii_bin ascii 65 Yes 1
tis620_thai_ci tis620 18 Yes Yes 1
tis620_bin tis620 89 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf16le_general_ci utf16le 56 Yes Yes 1
utf16le_bin utf16le 62 Yes 1
sjis_japanese_ci sjis 13 Yes Yes 1
sjis_bin sjis 88 Yes 1
big5_chinese_ci big5 1 Yes Yes 1
big5_bin big5 84 Yes 1
hkscs_bin hkscs 152 Yes Yes 1
hkscs31_bin hkscs31 153 Yes Yes 1
utf16_unicode_ci utf16 101 Yes 8
utf16_icelandic_ci utf16 102 Yes 8
utf16_latvian_ci utf16 103 Yes 8
utf16_romanian_ci utf16 104 Yes 8
utf16_slovenian_ci utf16 105 Yes 8
utf16_polish_ci utf16 106 Yes 8
utf16_estonian_ci utf16 107 Yes 8
utf16_spanish_ci utf16 108 Yes 8
utf16_swedish_ci utf16 109 Yes 8
utf16_turkish_ci utf16 110 Yes 8
utf16_czech_ci utf16 111 Yes 8
utf16_danish_ci utf16 112 Yes 8
utf16_lithuanian_ci utf16 113 Yes 8
utf16_slovak_ci utf16 114 Yes 8
utf16_spanish2_ci utf16 115 Yes 8
utf16_roman_ci utf16 116 Yes 8
utf16_persian_ci utf16 117 Yes 8
utf16_esperanto_ci utf16 118 Yes 8
utf16_hungarian_ci utf16 119 Yes 8
utf16_sinhala_ci utf16 120 Yes 8
utf16_german2_ci utf16 121 Yes 8
utf16_croatian_ci utf16 122 Yes 8
utf16_unicode_520_ci utf16 123 Yes 8
utf16_vietnamese_ci utf16 124 Yes 8
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
dec8_swedish_ci dec8 3 Yes Yes 8
dec8_bin dec8 69 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
SHOW CHARACTER SET;
Charset Description Default collation Maxlen
binary Binary pseudo charset binary 1
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
gbk GBK charset gbk_chinese_ci 2
utf16 UTF-16 Unicode utf16_general_ci 2
utf16 UTF-16 Unicode utf16_general_ci 4
gb18030 GB18030 charset gb18030_chinese_ci 4
latin1 cp1252 West European latin1_swedish_ci 1
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
ascii US ASCII ascii_general_ci 1
tis620 TIS620 Thai tis620_thai_ci 1
utf16le UTF-16LE Unicode utf16le_general_ci 4
sjis SJIS sjis_japanese_ci 2
big5 BIG5 big5_chinese_ci 2
hkscs HKSCS hkscs_bin 2
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
dec8 DEC West European dec8_swedish_ci 1
SET NAMES latin1;

View File

@ -7,8 +7,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf16_unicode_ci utf16 101 Yes 8
gb18030_chinese_ci gb18030 248 Yes Yes 2
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -20,14 +18,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
ascii_general_ci ascii 11 Yes Yes 1
ascii_bin ascii 65 Yes 1
tis620_thai_ci tis620 18 Yes Yes 1
tis620_bin tis620 89 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf16le_general_ci utf16le 56 Yes Yes 1
utf16le_bin utf16le 62 Yes 1
sjis_japanese_ci sjis 13 Yes Yes 1
sjis_bin sjis 88 Yes 1
big5_chinese_ci big5 1 Yes Yes 1
big5_bin big5 84 Yes 1
hkscs_bin hkscs 152 Yes Yes 1
hkscs31_bin hkscs31 153 Yes Yes 1
utf16_unicode_ci utf16 101 Yes 8
utf16_icelandic_ci utf16 102 Yes 8
utf16_latvian_ci utf16 103 Yes 8
utf16_romanian_ci utf16 104 Yes 8
utf16_slovenian_ci utf16 105 Yes 8
utf16_polish_ci utf16 106 Yes 8
utf16_estonian_ci utf16 107 Yes 8
utf16_spanish_ci utf16 108 Yes 8
utf16_swedish_ci utf16 109 Yes 8
utf16_turkish_ci utf16 110 Yes 8
utf16_czech_ci utf16 111 Yes 8
utf16_danish_ci utf16 112 Yes 8
utf16_lithuanian_ci utf16 113 Yes 8
utf16_slovak_ci utf16 114 Yes 8
utf16_spanish2_ci utf16 115 Yes 8
utf16_roman_ci utf16 116 Yes 8
utf16_persian_ci utf16 117 Yes 8
utf16_esperanto_ci utf16 118 Yes 8
utf16_hungarian_ci utf16 119 Yes 8
utf16_sinhala_ci utf16 120 Yes 8
utf16_german2_ci utf16 121 Yes 8
utf16_croatian_ci utf16 122 Yes 8
utf16_unicode_520_ci utf16 123 Yes 8
utf16_vietnamese_ci utf16 124 Yes 8
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
dec8_swedish_ci dec8 3 Yes Yes 8
dec8_bin dec8 69 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
show collation;
Collation Charset Id Default Compiled Sortlen
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
@ -37,8 +152,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf16_unicode_ci utf16 101 Yes 8
gb18030_chinese_ci gb18030 248 Yes Yes 2
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -50,14 +163,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
ascii_general_ci ascii 11 Yes Yes 1
ascii_bin ascii 65 Yes 1
tis620_thai_ci tis620 18 Yes Yes 1
tis620_bin tis620 89 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf16le_general_ci utf16le 56 Yes Yes 1
utf16le_bin utf16le 62 Yes 1
sjis_japanese_ci sjis 13 Yes Yes 1
sjis_bin sjis 88 Yes 1
big5_chinese_ci big5 1 Yes Yes 1
big5_bin big5 84 Yes 1
hkscs_bin hkscs 152 Yes Yes 1
hkscs31_bin hkscs31 153 Yes Yes 1
utf16_unicode_ci utf16 101 Yes 8
utf16_icelandic_ci utf16 102 Yes 8
utf16_latvian_ci utf16 103 Yes 8
utf16_romanian_ci utf16 104 Yes 8
utf16_slovenian_ci utf16 105 Yes 8
utf16_polish_ci utf16 106 Yes 8
utf16_estonian_ci utf16 107 Yes 8
utf16_spanish_ci utf16 108 Yes 8
utf16_swedish_ci utf16 109 Yes 8
utf16_turkish_ci utf16 110 Yes 8
utf16_czech_ci utf16 111 Yes 8
utf16_danish_ci utf16 112 Yes 8
utf16_lithuanian_ci utf16 113 Yes 8
utf16_slovak_ci utf16 114 Yes 8
utf16_spanish2_ci utf16 115 Yes 8
utf16_roman_ci utf16 116 Yes 8
utf16_persian_ci utf16 117 Yes 8
utf16_esperanto_ci utf16 118 Yes 8
utf16_hungarian_ci utf16 119 Yes 8
utf16_sinhala_ci utf16 120 Yes 8
utf16_german2_ci utf16 121 Yes 8
utf16_croatian_ci utf16 122 Yes 8
utf16_unicode_520_ci utf16 123 Yes 8
utf16_vietnamese_ci utf16 124 Yes 8
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
dec8_swedish_ci dec8 3 Yes Yes 8
dec8_bin dec8 69 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
show collation test;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your OceanBase version for the right syntax to use near 'test' at line 1
SHOW CHARACTER SET;
@ -65,23 +295,35 @@ Charset Description Default collation Maxlen
binary Binary pseudo charset binary 1
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
gbk GBK charset gbk_chinese_ci 2
utf16 UTF-16 Unicode utf16_general_ci 2
utf16 UTF-16 Unicode utf16_general_ci 4
gb18030 GB18030 charset gb18030_chinese_ci 4
latin1 cp1252 West European latin1_swedish_ci 1
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
ascii US ASCII ascii_general_ci 1
tis620 TIS620 Thai tis620_thai_ci 1
utf16le UTF-16LE Unicode utf16le_general_ci 4
sjis SJIS sjis_japanese_ci 2
big5 BIG5 big5_chinese_ci 2
hkscs HKSCS hkscs_bin 2
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
dec8 DEC West European dec8_swedish_ci 1
SHOW CHARACTER SET;
Charset Description Default collation Maxlen
binary Binary pseudo charset binary 1
utf8mb4 UTF-8 Unicode utf8mb4_general_ci 4
gbk GBK charset gbk_chinese_ci 2
utf16 UTF-16 Unicode utf16_general_ci 2
utf16 UTF-16 Unicode utf16_general_ci 4
gb18030 GB18030 charset gb18030_chinese_ci 4
latin1 cp1252 West European latin1_swedish_ci 1
gb18030_2022 GB18030-2022 charset gb18030_2022_chinese_ci 4
ascii US ASCII ascii_general_ci 1
tis620 TIS620 Thai tis620_thai_ci 1
utf16le UTF-16LE Unicode utf16le_general_ci 4
sjis SJIS sjis_japanese_ci 2
big5 BIG5 big5_chinese_ci 2
hkscs HKSCS hkscs_bin 2
hkscs31 HKSCS-ISO UNICODE 31 hkscs31_bin 2
dec8 DEC West European dec8_swedish_ci 1
SHOW CHARACTER SET test;
ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your OceanBase version for the right syntax to use near 'test' at line 1
set names utf8;

View File

@ -4,33 +4,51 @@ CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN
binary binary Binary pseudo charset 1
utf8mb4 utf8mb4_general_ci UTF-8 Unicode 4
gbk gbk_chinese_ci GBK charset 2
utf16 utf16_general_ci UTF-16 Unicode 2
utf16 utf16_general_ci UTF-16 Unicode 4
gb18030 gb18030_chinese_ci GB18030 charset 4
latin1 latin1_swedish_ci cp1252 West European 1
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
ascii ascii_general_ci US ASCII 1
tis620 tis620_thai_ci TIS620 Thai 1
utf16le utf16le_general_ci UTF-16LE Unicode 4
sjis sjis_japanese_ci SJIS 2
big5 big5_chinese_ci BIG5 2
hkscs hkscs_bin HKSCS 2
hkscs31 hkscs31_bin HKSCS-ISO UNICODE 31 2
dec8 dec8_swedish_ci DEC West European 1
select character_set_name, default_collate_name, description, maxlen from character_sets;
character_set_name default_collate_name description maxlen
binary binary Binary pseudo charset 1
utf8mb4 utf8mb4_general_ci UTF-8 Unicode 4
gbk gbk_chinese_ci GBK charset 2
utf16 utf16_general_ci UTF-16 Unicode 2
utf16 utf16_general_ci UTF-16 Unicode 4
gb18030 gb18030_chinese_ci GB18030 charset 4
latin1 latin1_swedish_ci cp1252 West European 1
gb18030_2022 gb18030_2022_chinese_ci GB18030-2022 charset 4
ascii ascii_general_ci US ASCII 1
tis620 tis620_thai_ci TIS620 Thai 1
utf16le utf16le_general_ci UTF-16LE Unicode 4
sjis sjis_japanese_ci SJIS 2
big5 big5_chinese_ci BIG5 2
hkscs hkscs_bin HKSCS 2
hkscs31 hkscs31_bin HKSCS-ISO UNICODE 31 2
dec8 dec8_swedish_ci DEC West European 1
select maxlen from character_sets;
maxlen
1
4
2
4
4
1
4
1
1
4
2
2
2
2
4
1
4
1
1
select * from character_sets where character_set_name like '%binary%';
CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN

View File

@ -8,8 +8,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf16_unicode_ci utf16 101 Yes 8
gb18030_chinese_ci gb18030 248 Yes Yes 2
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -21,14 +19,131 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
ascii_general_ci ascii 11 Yes Yes 1
ascii_bin ascii 65 Yes 1
tis620_thai_ci tis620 18 Yes Yes 1
tis620_bin tis620 89 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf16le_general_ci utf16le 56 Yes Yes 1
utf16le_bin utf16le 62 Yes 1
sjis_japanese_ci sjis 13 Yes Yes 1
sjis_bin sjis 88 Yes 1
big5_chinese_ci big5 1 Yes Yes 1
big5_bin big5 84 Yes 1
hkscs_bin hkscs 152 Yes Yes 1
hkscs31_bin hkscs31 153 Yes Yes 1
utf16_unicode_ci utf16 101 Yes 8
utf16_icelandic_ci utf16 102 Yes 8
utf16_latvian_ci utf16 103 Yes 8
utf16_romanian_ci utf16 104 Yes 8
utf16_slovenian_ci utf16 105 Yes 8
utf16_polish_ci utf16 106 Yes 8
utf16_estonian_ci utf16 107 Yes 8
utf16_spanish_ci utf16 108 Yes 8
utf16_swedish_ci utf16 109 Yes 8
utf16_turkish_ci utf16 110 Yes 8
utf16_czech_ci utf16 111 Yes 8
utf16_danish_ci utf16 112 Yes 8
utf16_lithuanian_ci utf16 113 Yes 8
utf16_slovak_ci utf16 114 Yes 8
utf16_spanish2_ci utf16 115 Yes 8
utf16_roman_ci utf16 116 Yes 8
utf16_persian_ci utf16 117 Yes 8
utf16_esperanto_ci utf16 118 Yes 8
utf16_hungarian_ci utf16 119 Yes 8
utf16_sinhala_ci utf16 120 Yes 8
utf16_german2_ci utf16 121 Yes 8
utf16_croatian_ci utf16 122 Yes 8
utf16_unicode_520_ci utf16 123 Yes 8
utf16_vietnamese_ci utf16 124 Yes 8
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
dec8_swedish_ci dec8 3 Yes Yes 8
dec8_bin dec8 69 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
select collation_name, character_set_name, id, is_default, is_compiled, sortlen from collations;
collation_name character_set_name id is_default is_compiled sortlen
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
@ -38,8 +153,6 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf16_unicode_ci utf16 101 Yes 8
gb18030_chinese_ci gb18030 248 Yes Yes 2
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -51,23 +164,222 @@ gb18030_2022_radical_ci gb18030_2022 219 Yes 1
gb18030_2022_radical_cs gb18030_2022 220 Yes 1
gb18030_2022_stroke_ci gb18030_2022 221 Yes 1
gb18030_2022_stroke_cs gb18030_2022 222 Yes 1
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
ascii_general_ci ascii 11 Yes Yes 1
ascii_bin ascii 65 Yes 1
tis620_thai_ci tis620 18 Yes Yes 1
tis620_bin tis620 89 Yes 1
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf16le_general_ci utf16le 56 Yes Yes 1
utf16le_bin utf16le 62 Yes 1
sjis_japanese_ci sjis 13 Yes Yes 1
sjis_bin sjis 88 Yes 1
big5_chinese_ci big5 1 Yes Yes 1
big5_bin big5 84 Yes 1
hkscs_bin hkscs 152 Yes Yes 1
hkscs31_bin hkscs31 153 Yes Yes 1
utf16_unicode_ci utf16 101 Yes 8
utf16_icelandic_ci utf16 102 Yes 8
utf16_latvian_ci utf16 103 Yes 8
utf16_romanian_ci utf16 104 Yes 8
utf16_slovenian_ci utf16 105 Yes 8
utf16_polish_ci utf16 106 Yes 8
utf16_estonian_ci utf16 107 Yes 8
utf16_spanish_ci utf16 108 Yes 8
utf16_swedish_ci utf16 109 Yes 8
utf16_turkish_ci utf16 110 Yes 8
utf16_czech_ci utf16 111 Yes 8
utf16_danish_ci utf16 112 Yes 8
utf16_lithuanian_ci utf16 113 Yes 8
utf16_slovak_ci utf16 114 Yes 8
utf16_spanish2_ci utf16 115 Yes 8
utf16_roman_ci utf16 116 Yes 8
utf16_persian_ci utf16 117 Yes 8
utf16_esperanto_ci utf16 118 Yes 8
utf16_hungarian_ci utf16 119 Yes 8
utf16_sinhala_ci utf16 120 Yes 8
utf16_german2_ci utf16 121 Yes 8
utf16_croatian_ci utf16 122 Yes 8
utf16_unicode_520_ci utf16 123 Yes 8
utf16_vietnamese_ci utf16 124 Yes 8
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
dec8_swedish_ci dec8 3 Yes Yes 8
dec8_bin dec8 69 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
select * from collations where collation_name like '%utf8%';
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
utf8mb4_bin utf8mb4 46 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 8
utf8mb4_icelandic_ci utf8mb4 225 Yes 8
utf8mb4_latvian_ci utf8mb4 226 Yes 8
utf8mb4_romanian_ci utf8mb4 227 Yes 8
utf8mb4_slovenian_ci utf8mb4 228 Yes 8
utf8mb4_polish_ci utf8mb4 229 Yes 8
utf8mb4_estonian_ci utf8mb4 230 Yes 8
utf8mb4_spanish_ci utf8mb4 231 Yes 8
utf8mb4_swedish_ci utf8mb4 232 Yes 8
utf8mb4_turkish_ci utf8mb4 233 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_danish_ci utf8mb4 235 Yes 8
utf8mb4_lithuanian_ci utf8mb4 236 Yes 8
utf8mb4_slovak_ci utf8mb4 237 Yes 8
utf8mb4_spanish2_ci utf8mb4 238 Yes 8
utf8mb4_roman_ci utf8mb4 239 Yes 8
utf8mb4_persian_ci utf8mb4 240 Yes 8
utf8mb4_esperanto_ci utf8mb4 241 Yes 8
utf8mb4_hungarian_ci utf8mb4 242 Yes 8
utf8mb4_sinhala_ci utf8mb4 243 Yes 8
utf8mb4_german2_ci utf8mb4 244 Yes 8
utf8mb4_croatian_ci utf8mb4 245 Yes 8
utf8mb4_unicode_520_ci utf8mb4 246 Yes 8
utf8mb4_czech_ci utf8mb4 234 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 1
utf8mb4_vietnamese_ci utf8mb4 247 Yes 8
utf8mb4_0900_ai_ci utf8mb4 255 Yes 0
utf8mb4_de_pb_0900_ai_ci utf8mb4 256 Yes 0
utf8mb4_is_0900_ai_ci utf8mb4 257 Yes 0
utf8mb4_lv_0900_ai_ci utf8mb4 258 Yes 0
utf8mb4_ro_0900_ai_ci utf8mb4 259 Yes 0
utf8mb4_sl_0900_ai_ci utf8mb4 260 Yes 0
utf8mb4_pl_0900_ai_ci utf8mb4 261 Yes 0
utf8mb4_et_0900_ai_ci utf8mb4 262 Yes 0
utf8mb4_es_0900_ai_ci utf8mb4 263 Yes 0
utf8mb4_sv_0900_ai_ci utf8mb4 264 Yes 0
utf8mb4_tr_0900_ai_ci utf8mb4 265 Yes 0
utf8mb4_cs_0900_ai_ci utf8mb4 266 Yes 0
utf8mb4_da_0900_ai_ci utf8mb4 267 Yes 0
utf8mb4_lt_0900_ai_ci utf8mb4 268 Yes 0
utf8mb4_sk_0900_ai_ci utf8mb4 269 Yes 0
utf8mb4_es_trad_0900_ai_ci utf8mb4 270 Yes 0
utf8mb4_la_0900_ai_ci utf8mb4 271 Yes 0
utf8mb4_eo_0900_ai_ci utf8mb4 273 Yes 0
utf8mb4_hu_0900_ai_ci utf8mb4 274 Yes 0
utf8mb4_hr_0900_ai_ci utf8mb4 275 Yes 0
utf8mb4_vi_0900_ai_ci utf8mb4 277 Yes 0
utf8mb4_0900_as_cs utf8mb4 278 Yes 0
utf8mb4_de_pb_0900_as_cs utf8mb4 279 Yes 0
utf8mb4_is_0900_as_cs utf8mb4 280 Yes 0
utf8mb4_lv_0900_as_cs utf8mb4 281 Yes 0
utf8mb4_ro_0900_as_cs utf8mb4 282 Yes 0
utf8mb4_sl_0900_as_cs utf8mb4 283 Yes 0
utf8mb4_pl_0900_as_cs utf8mb4 284 Yes 0
utf8mb4_et_0900_as_cs utf8mb4 285 Yes 0
utf8mb4_es_0900_as_cs utf8mb4 286 Yes 0
utf8mb4_sv_0900_as_cs utf8mb4 287 Yes 0
utf8mb4_tr_0900_as_cs utf8mb4 288 Yes 0
utf8mb4_cs_0900_as_cs utf8mb4 289 Yes 0
utf8mb4_da_0900_as_cs utf8mb4 290 Yes 0
utf8mb4_lt_0900_as_cs utf8mb4 291 Yes 0
utf8mb4_sk_0900_as_cs utf8mb4 292 Yes 0
utf8mb4_es_trad_0900_as_cs utf8mb4 293 Yes 0
utf8mb4_la_0900_as_cs utf8mb4 294 Yes 0
utf8mb4_eo_0900_as_cs utf8mb4 296 Yes 0
utf8mb4_hu_0900_as_cs utf8mb4 297 Yes 0
utf8mb4_hr_0900_as_cs utf8mb4 298 Yes 0
utf8mb4_vi_0900_as_cs utf8mb4 300 Yes 0
utf8mb4_ja_0900_as_cs utf8mb4 303 Yes 0
utf8mb4_ja_0900_as_cs_ks utf8mb4 304 Yes 24
utf8mb4_0900_as_ci utf8mb4 305 Yes 0
utf8mb4_ru_0900_ai_ci utf8mb4 306 Yes 0
utf8mb4_ru_0900_as_cs utf8mb4 307 Yes 0
utf8mb4_zh_0900_as_cs utf8mb4 308 Yes 0
utf8mb4_0900_bin utf8mb4 309 Yes 1
utf8mb4_nb_0900_ai_ci utf8mb4 310 Yes 0
utf8mb4_nb_0900_as_cs utf8mb4 311 Yes 0
utf8mb4_nn_0900_ai_ci utf8mb4 312 Yes 0
utf8mb4_nn_0900_as_cs utf8mb4 313 Yes 0
utf8mb4_sr_latn_0900_ai_ci utf8mb4 314 Yes 0
utf8mb4_sr_latn_0900_as_cs utf8mb4 315 Yes 0
utf8mb4_bs_0900_ai_ci utf8mb4 316 Yes 0
utf8mb4_bs_0900_as_cs utf8mb4 317 Yes 0
utf8mb4_bg_0900_ai_ci utf8mb4 318 Yes 0
utf8mb4_bg_0900_as_cs utf8mb4 319 Yes 0
utf8mb4_gl_0900_ai_ci utf8mb4 320 Yes 0
utf8mb4_gl_0900_as_cs utf8mb4 321 Yes 0
utf8mb4_mn_cyrl_0900_ai_ci utf8mb4 322 Yes 0
utf8mb4_mn_cyrl_0900_as_cs utf8mb4 323 Yes 0
show create table collations;
View Create View character_set_client collation_connection
COLLATIONS CREATE VIEW `COLLATIONS` AS select collation as COLLATION_NAME, charset as CHARACTER_SET_NAME, id as ID, `is_default` as IS_DEFAULT, is_compiled as IS_COMPILED, sortlen as SORTLEN from oceanbase.__tenant_virtual_collation utf8mb4 utf8mb4_general_ci

View File

@ -51,7 +51,7 @@ int segment_and_calc_word_count(
int64_t doc_length = 0;
if (OB_ISNULL(helper)
|| OB_UNLIKELY(ObCollationType::CS_TYPE_INVALID == type
|| ObCollationType::CS_TYPE_EXTENDED_MARK < type)
|| ObCollationType::CS_TYPE_PINYIN_BEGIN_MARK <= type)
|| OB_UNLIKELY(!words_count.created())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), KPC(helper), K(type), K(words_count.created()));
@ -510,7 +510,7 @@ TEST_F(ObTestFTParseHelper, test_parse_fulltext)
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, ft_word_map));
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_PINYIN_BEGIN_MARK, ObTestAddWord::TEST_FULLTEXT,
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, ft_word_map));
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));
@ -681,7 +681,7 @@ TEST_F(ObTestNgramFTParseHelper, test_parse_fulltext)
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_INVALID, ObTestAddWord::TEST_FULLTEXT,
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_EXTENDED_MARK, ObTestAddWord::TEST_FULLTEXT,
ASSERT_EQ(OB_INVALID_ARGUMENT, parse_helper_.segment(CS_TYPE_PINYIN_BEGIN_MARK, ObTestAddWord::TEST_FULLTEXT,
std::strlen(ObTestAddWord::TEST_FULLTEXT), doc_length, words));
ASSERT_EQ(OB_INIT_TWICE, parse_helper_.init(&allocator_, plugin_name_));