[FEAT MERGE]charset latin1

This commit is contained in:
akaError
2023-01-28 15:43:48 +08:00
committed by ob-robot
parent 2663894581
commit af2506b14c
42 changed files with 1215 additions and 392 deletions

View File

@ -28,6 +28,7 @@
#include "lib/string/ob_sql_string.h"
#include "lib/worker.h"
#include "common/object/ob_obj_funcs.h"
#include "lib/charset/ob_charset.h"
using namespace oceanbase;
using namespace oceanbase::common;
@ -1410,7 +1411,7 @@ int ObObj::convert_string_value_charset(ObCharsetType charset_type, ObIAllocator
} else if (CS_TYPE_BINARY != get_collation_type() && CS_TYPE_BINARY != collation_type
&& strcmp(from_charset_info->csname, to_charset_info->csname) != 0) {
char *buf = NULL;
int32_t buf_len = str.length() * 4;
int32_t buf_len = str.length() * ObCharset::CharConvertFactorNum;
uint32_t result_len = 0;
if (0 == buf_len) {
//do noting

View File

@ -279,6 +279,7 @@ const ObCharsetWrapper ObCharset::charset_wrap_arr_[ObCharset::VALID_CHARSET_TYP
{CHARSET_GBK, "GBK charset", CS_TYPE_GBK_CHINESE_CI, 2},
{CHARSET_UTF16, "UTF-16 Unicode", CS_TYPE_UTF16_GENERAL_CI, 2},
{CHARSET_GB18030, "GB18030 charset", CS_TYPE_GB18030_CHINESE_CI, 4},
{CHARSET_LATIN1, "cp1252 West European", CS_TYPE_LATIN1_SWEDISH_CI, 1},
};
const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATION_TYPES] =
@ -294,11 +295,13 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
{CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
{CS_TYPE_GB18030_CHINESE_CI, CHARSET_GB18030, CS_TYPE_GB18030_CHINESE_CI, true, true, 1},
{CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
{CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
{CS_TYPE_LATIN1_BIN, CHARSET_LATIN1, CS_TYPE_LATIN1_BIN,false, true, 1},
};
ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 0 ~ 7
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 8
&ob_charset_latin1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 8
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 16
NULL, NULL, NULL, NULL, &ob_charset_gbk_chinese_ci, // 24
NULL, NULL, NULL, // 29
@ -306,7 +309,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
NULL, NULL, NULL, NULL, NULL, // 40
&ob_charset_utf8mb4_general_ci, // 45
&ob_charset_utf8mb4_bin, // 46
NULL, // 47
&ob_charset_latin1_bin, // 47
NULL, NULL, NULL, NULL, NULL, NULL, // 48
&ob_charset_utf16_general_ci,// 54
&ob_charset_utf16_bin, // 55
@ -1345,6 +1348,10 @@ const char *ObCharset::charset_name(ObCharsetType charset_type)
ret_name = "gb18030";
break;
}
case CHARSET_LATIN1: {
ret_name = "latin1";
break;
}
default: {
break;
}
@ -1442,6 +1449,8 @@ ObCharsetType ObCharset::charset_type(const ObString &cs_name)
charset_type = CHARSET_UTF16;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_bin.csname)) {
charset_type = CHARSET_GB18030;
} else if (0 == cs_name.case_compare(ob_charset_latin1.csname)) {
charset_type = CHARSET_LATIN1;
}
return charset_type;
}
@ -1458,6 +1467,8 @@ ObCharsetType ObCharset::charset_type_by_name_oracle(const ObString &cs_name)
charset_type = CHARSET_GBK;
} else if (0 == cs_name.case_compare("ZHS32GB18030")) {
charset_type = CHARSET_GB18030;
} else if (0 == cs_name.case_compare("WE8MSWIN1252")) {
charset_type = CHARSET_LATIN1;
}
return charset_type;
}
@ -1500,6 +1511,10 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
collation_type = CS_TYPE_GB18030_BIN;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_chinese_ci.name)) {
collation_type = CS_TYPE_GB18030_CHINESE_CI;
} else if (0 == cs_name.case_compare(ob_charset_latin1_bin.name)) {
collation_type = CS_TYPE_LATIN1_BIN;
} else if (0 == cs_name.case_compare(ob_charset_latin1.name)) {
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_chinese_cs.name)) {
collation_type = CS_TYPE_GB18030_CHINESE_CS;
} else if (0 == cs_name.case_compare("any_cs")) {
@ -1541,10 +1556,13 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
|| CS_TYPE_GB18030_BIN == collation_type) {
ret = true;
}
} else if (CHARSET_LATIN1 == charset_type) {
if (CS_TYPE_LATIN1_SWEDISH_CI == collation_type || CS_TYPE_LATIN1_BIN == collation_type) {
ret = true;
}
}
return ret;
}
ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_type,
const ObString &nlssort_param)
{
@ -1557,6 +1575,7 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
CS_TYPE_GBK_BIN,
CS_TYPE_UTF16_BIN,
CS_TYPE_GB18030_BIN,
CS_TYPE_LATIN1_BIN
};
static ObCollationType non_bin_coll_marks[NLS_COLLATION_MAX] = {
CS_TYPE_INVALID,
@ -1581,8 +1600,10 @@ ObCollationType ObCharset::get_coll_type_by_nlssort_param(ObCharsetType charset_
} else if (nls_coll_type == NLS_COLLATION_SCHINESE_PINYIN_M) {
coll_type = CS_TYPE_GB18030_CHINESE_CS;
} else {
coll_type = static_cast<ObCollationType>(
non_bin_coll_marks[nls_coll_type] + (charset_type - CHARSET_BINARY));
if (charset_type != CHARSET_LATIN1) {
coll_type = static_cast<ObCollationType>(
non_bin_coll_marks[nls_coll_type] + (charset_type - CHARSET_BINARY));
}
}
}
return coll_type;
@ -1601,6 +1622,8 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
|| CS_TYPE_GB18030_BIN == collation_type
|| CS_TYPE_GB18030_CHINESE_CI == collation_type
|| CS_TYPE_GB18030_CHINESE_CS == collation_type
|| CS_TYPE_LATIN1_SWEDISH_CI == collation_type
|| CS_TYPE_LATIN1_BIN == collation_type
;
}
@ -1648,6 +1671,11 @@ ObCharsetType ObCharset::charset_type_by_coll(ObCollationType collation_type)
charset_type = CHARSET_GB18030;
break;
}
case CS_TYPE_LATIN1_SWEDISH_CI:
case CS_TYPE_LATIN1_BIN: {
charset_type = CHARSET_LATIN1;
break;
}
default: {
break;
}
@ -1672,6 +1700,9 @@ ObNlsCharsetId ObCharset::charset_type_to_ora_charset_id(ObCharsetType cs_type)
case CHARSET_UTF16:
cs_id = CHARSET_AL16UTF16_ID;
break;
case CHARSET_LATIN1:
cs_id = CHARSET_WE8MSWIN1252_ID;
break;
default:
break;
}
@ -1695,6 +1726,8 @@ ObCharsetType ObCharset::ora_charset_type_to_charset_type(ObNlsCharsetId charset
case CHARSET_AL16UTF16_ID:
cs_type = CHARSET_UTF16;
break;
case CHARSET_WE8MSWIN1252_ID:
cs_type = CHARSET_LATIN1;
default:
break;
}
@ -1804,8 +1837,8 @@ int ObCharset::aggregate_collation(
if (OB_UNLIKELY(
CS_LEVEL_INVALID == collation_level1
|| CS_LEVEL_INVALID == collation_level2
|| CS_TYPE_INVALID == collation_type1
|| CS_TYPE_INVALID == collation_type2)) {
|| !is_valid_collation(collation_type1)
|| !is_valid_collation(collation_type2))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN ("invalid collation level or type",
K(ret), K(collation_level1), K(collation_type1), K(collation_level2), K(collation_type2));
@ -1827,107 +1860,39 @@ int ObCharset::aggregate_collation(
res_level = collation_level2;
res_type = collation_type2;
} else if (charset_type_by_coll(collation_type1) != charset_type_by_coll(collation_type2)) {
/**
* 左右字符集不相同的情况
* 依次处理3种情况
* utf8mb4和utf16:使用utf16
* utf8mb4和gbk:使用utf8mb4
* utf16和gbk:使用utf16
* utf8mb4和gb18030:使用utf8mb4
* utf16和gb18030:使用utf16
* gbk和gb18030:使用gb18030
*/
if ((charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4
&& charset_type_by_coll(collation_type2) == CHARSET_GBK)
|| (charset_type_by_coll(collation_type2) == CHARSET_UTF8MB4
&& charset_type_by_coll(collation_type1) == CHARSET_GBK)) {
if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
} else if ((charset_type_by_coll(collation_type1) == CHARSET_UTF16
&& charset_type_by_coll(collation_type2) == CHARSET_GBK)
|| (charset_type_by_coll(collation_type2) == CHARSET_UTF16
&& charset_type_by_coll(collation_type1) == CHARSET_GBK)) {
if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
} else if ((charset_type_by_coll(collation_type1) == CHARSET_UTF16
&& charset_type_by_coll(collation_type2) == CHARSET_UTF8MB4)
|| (charset_type_by_coll(collation_type2) == CHARSET_UTF16
&& charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4)) {
if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
} else if ((charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4
&& charset_type_by_coll(collation_type2) == CHARSET_GB18030)
|| (charset_type_by_coll(collation_type2) == CHARSET_UTF8MB4
&& charset_type_by_coll(collation_type1) == CHARSET_GB18030)) {
if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
} else if ((charset_type_by_coll(collation_type1) == CHARSET_UTF16
&& charset_type_by_coll(collation_type2) == CHARSET_GB18030)
|| (charset_type_by_coll(collation_type2) == CHARSET_UTF16
&& charset_type_by_coll(collation_type1) == CHARSET_GB18030)) {
if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
} else if ((charset_type_by_coll(collation_type1) == CHARSET_GBK
&& charset_type_by_coll(collation_type2) == CHARSET_GB18030)
|| (charset_type_by_coll(collation_type2) == CHARSET_GBK
&& charset_type_by_coll(collation_type1) == CHARSET_GB18030)) {
if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
res_level = collation_level1;
res_type = collation_type1;
} else {
res_level = collation_level2;
res_type = collation_type2;
}
/**
* 左右字符集不相同的情况
* 主要以下情况
* utf8mb4和utf16:使用utf16
* utf8mb4和gbk:使用utf8mb4
* utf16和gbk:使用utf16
* utf8mb4和gb18030:使用utf8mb4
* utf16和gb18030:使用utf16
* gbk和gb18030:使用gb18030
* 以上任一字符集X与latin1的组合结果都为X,latin1目前地位最低
*/
int res = AGGREGATE_2CHARSET[charset_type_by_coll(collation_type1)][charset_type_by_coll(collation_type2)];
if (res == 1) {
res_type = collation_type1;
res_level = collation_level1;
} else if (res == 2) {
res_type = collation_type2;
res_level = collation_level2;
} else {
// 所有不能转换的情况都到这里
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else {
// 所有不能转换的情况都到这里
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else {
//处理相同字符集的情况,每种字符集单独考虑
if (collation_type1 == collation_type2) {
res_type = collation_type1;
res_level = collation_level1;
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
//处理相同字符集的情况,每种字符集单独考虑
if (collation_type1 == collation_type2) {
res_type = collation_type1;
res_level = collation_level1;
} else if (CS_LEVEL_EXPLICIT == collation_level1) {
ret = OB_CANT_AGGREGATE_2COLLATIONS;
// ERROR 1267 (HY000): Illegal mix of collations (utf8_general_ci,EXPLICIT) and (utf8_bin,EXPLICIT) for operation '='
// LOG_USER_ERROR(ret);
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
// 处理utf8mb4编码
if (OB_UNLIKELY(collation_type1 != CS_TYPE_UTF8MB4_BIN
&& collation_type1 != CS_TYPE_UTF8MB4_GENERAL_CI
&& collation_type1 != CS_TYPE_UTF8MB4_UNICODE_CI) ||
OB_UNLIKELY(collation_type2 != CS_TYPE_UTF8MB4_BIN
&& collation_type2 != CS_TYPE_UTF8MB4_GENERAL_CI
&& collation_type2 != CS_TYPE_UTF8MB4_UNICODE_CI)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid collation level or type",
K(ret), K(collation_level1), K(collation_type1), K(collation_level2), K(collation_type2));
} else {
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF8MB4) {
if (collation_type1 == CS_TYPE_UTF8MB4_BIN || collation_type2 == CS_TYPE_UTF8MB4_BIN) {
res_type = CS_TYPE_UTF8MB4_BIN;
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
@ -1935,31 +1900,10 @@ int ObCharset::aggregate_collation(
// utf8mb4_unicode_ci和utf8mb4_general_ci的情况报错,和mysql兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
}
} else if (charset_type_by_coll(collation_type2) == CHARSET_GBK) {
// utf8mb4_general_ci vs utf8mb4_bin
if (OB_UNLIKELY(collation_type1 != CS_TYPE_GBK_BIN && collation_type1 != CS_TYPE_GBK_CHINESE_CI) ||
OB_UNLIKELY(collation_type2 != CS_TYPE_GBK_BIN && collation_type2 != CS_TYPE_GBK_CHINESE_CI)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid collation level or type",
K(ret), K(collation_level1), K(collation_type1), K(collation_level2), K(collation_type2));
} else {
// gbk_bin is prefer to gbk_xxx
res_type = CS_TYPE_GBK_BIN;
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
}
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
if (OB_UNLIKELY(collation_type1 != CS_TYPE_UTF16_BIN
&& collation_type1 != CS_TYPE_UTF16_GENERAL_CI
&& collation_type1 != CS_TYPE_UTF16_UNICODE_CI) ||
OB_UNLIKELY(collation_type2 != CS_TYPE_UTF16_BIN
&& collation_type2 != CS_TYPE_UTF16_GENERAL_CI
&& collation_type2 != CS_TYPE_UTF16_UNICODE_CI)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid collation level or type",
K(ret), K(collation_level1), K(collation_type1), K(collation_level2), K(collation_type2));
} else {
res_level = (CS_TYPE_UTF8MB4_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_GBK) {
res_type = CS_TYPE_GBK_BIN;
res_level = (CS_TYPE_GBK_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_UTF16) {
if (collation_type1 == CS_TYPE_UTF16_BIN || collation_type2 == CS_TYPE_UTF16_BIN) {
res_type = CS_TYPE_UTF16_BIN;
res_level = (CS_TYPE_UTF16_BIN == collation_type1) ? collation_level1 : collation_level2;
@ -1967,35 +1911,29 @@ int ObCharset::aggregate_collation(
// utf16_unicode_ci和utf16_general_ci直接报错,不应该出现这种情况
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
}
} else if (charset_type_by_coll(collation_type2) == CHARSET_GB18030) {
// utf8mb4_general_ci vs utf8mb4_bin
if (OB_UNLIKELY(collation_type1 != CS_TYPE_GB18030_BIN
&& collation_type1 != CS_TYPE_GB18030_CHINESE_CI)
|| OB_UNLIKELY(collation_type2 != CS_TYPE_GB18030_BIN
&& collation_type2 != CS_TYPE_GB18030_CHINESE_CI)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid collation level or type",
K(ret), K(collation_level1), K(collation_type1),
K(collation_level2), K(collation_type2));
} else if (charset_type_by_coll(collation_type1) == CHARSET_GB18030) {
res_type = CS_TYPE_GB18030_BIN;
res_level = (CS_TYPE_GB18030_BIN == collation_type1) ? collation_level1 : collation_level2;
} else if (charset_type_by_coll(collation_type1) == CHARSET_LATIN1) {
if (collation_type1 == CS_TYPE_LATIN1_BIN || collation_type2 == CS_TYPE_LATIN1_BIN) {
res_type = CS_TYPE_LATIN1_BIN;
res_level = (CS_TYPE_LATIN1_BIN == collation_type1) ? collation_level1 : collation_level2;
} else {
//未来可能支持latin1_german,与latin1_swedish不兼容
ret = OB_CANT_AGGREGATE_2COLLATIONS;
}
} else {
// gbk_bin is prefer to gbk_xxx
res_type = CS_TYPE_GB18030_BIN;
res_level = (CS_TYPE_GB18030_BIN ==
collation_type1) ? collation_level1 : collation_level2;
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("Unexpected charset", K(collation_type1), K(collation_type2), KCSTRING(lbt()));
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("Illegal mix of collations", K(ret),
"type1", ObCharset::collation_name(collation_type1),
"level1", ObCharset::collation_level(collation_level1),
"type2", ObCharset::collation_name(collation_type2),
"level2", ObCharset::collation_level(collation_level2));
if (OB_FAIL(ret)) {
LOG_WARN("Illegal mix of collations", K(ret),
"type1", ObCharset::collation_name(collation_type1),
"level1", ObCharset::collation_level(collation_level1),
"type2", ObCharset::collation_name(collation_type2),
"level2", ObCharset::collation_level(collation_level2));
}
}
return ret;
}
@ -2047,6 +1985,10 @@ ObCollationType ObCharset::get_default_collation(ObCharsetType charset_type)
collation_type = CS_TYPE_GB18030_CHINESE_CI;
break;
}
case CHARSET_LATIN1: {
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
break;
}
default: {
break;
}
@ -2085,6 +2027,10 @@ ObCollationType ObCharset::get_default_collation_oracle(ObCharsetType charset_ty
collation_type = CS_TYPE_GB18030_BIN;
break;
}
case CHARSET_LATIN1: {
collation_type = CS_TYPE_LATIN1_BIN;
break;
}
default: {
break;
}
@ -2116,6 +2062,10 @@ int ObCharset::get_default_collation(ObCharsetType charset_type, ObCollationType
collation_type = CS_TYPE_GB18030_CHINESE_CI;
break;
}
case CHARSET_LATIN1: {
collation_type = CS_TYPE_LATIN1_SWEDISH_CI;
break;
}
default: {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid charset type", K(ret), K(charset_type));
@ -2149,6 +2099,10 @@ ObCollationType ObCharset::get_bin_collation(ObCharsetType charset_type)
collation_type = CS_TYPE_GB18030_BIN;
break;
}
case CHARSET_LATIN1: {
collation_type = CS_TYPE_LATIN1_BIN;
break;
}
default: {
break;
}
@ -2278,6 +2232,7 @@ bool ObCharset::is_default_collation(ObCollationType collation_type)
case CS_TYPE_GBK_CHINESE_CI:
case CS_TYPE_UTF16_GENERAL_CI:
case CS_TYPE_GB18030_CHINESE_CI:
case CS_TYPE_LATIN1_SWEDISH_CI:
case CS_TYPE_BINARY: {
ret = true;
break;
@ -2699,6 +2654,7 @@ int ObCharset::get_aggregate_len_unit(const ObCollationType collation_type, bool
len_in_byte = false;
ObCharsetType res_charset = ObCharset::charset_type_by_coll(collation_type);
if (CHARSET_UTF8MB4 == res_charset
|| CHARSET_LATIN1 == res_charset
|| CHARSET_UTF16 == res_charset
|| CHARSET_GBK == res_charset
|| CHARSET_GB18030 == res_charset) {
@ -2920,6 +2876,7 @@ bool ObCharset::is_valid_connection_collation(ObCollationType collation_type)
{
ObCharsetType cs_type = ObCharset::charset_type_by_coll(collation_type);
return cs_type == CHARSET_UTF8MB4
|| cs_type == CHARSET_LATIN1
|| cs_type == CHARSET_GBK
|| cs_type == CHARSET_GB18030
|| cs_type == CHARSET_BINARY;
@ -2941,6 +2898,9 @@ const char *ObCharset::get_oracle_charset_name_by_charset_type(ObCharsetType cha
case CHARSET_GB18030:
ret = "ZHS32GB18030";
break;
case CHARSET_LATIN1:
ret = "WE8MSWIN1252";
break;
default:
break;
}
@ -2963,6 +2923,9 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
case CHARSET_GB18030:
ret_id = ObNlsCharsetId::CHARSET_ZHS32GB18030_ID;
break;
case CHARSET_LATIN1:
ret_id = ObNlsCharsetId::CHARSET_WE8MSWIN1252_ID;
break;
default:
break;
}

View File

@ -37,15 +37,36 @@ enum ObCharsetType
CHARSET_GBK = 3,
CHARSET_UTF16 = 4,
CHARSET_GB18030 = 5,
CHARSET_LATIN1 = 6,
CHARSET_MAX,
};
/*
*AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_GBK]=1表示结果为CHARSET_UTF8MB4
*AGGREGATE_2CHARSET[CHARSET_GBK][CHARSET_UTF8MB4]=2表示结果为CHARSET_UTF8MB4
*矩阵中只对当前需要考虑的情况填值1&2,其余补0
*return value means idx of the resule type, 0 means OB_CANT_AGGREGATE_2COLLATIONS
*there is no possibly to reach AGGREGATE_2CHARSET[CHARSET_UTF8MB4][CHARSET_UTF8MB4] and so on
*/
static const int AGGREGATE_2CHARSET[CHARSET_MAX][CHARSET_MAX] = {
//CHARSET_INVALI,CHARSET_UTF8MB4...
{0,0,0,0,0,0,0},//CHARSET_INVALI
{0,0,0,0,0,0,0},//CHARSET_BINARY
{0,0,0,1,2,1,1},//CHARSET_UTF8MB4
{0,0,2,0,2,2,1},//CHARSET_GBK
{0,0,1,1,0,1,1},//CHARSET_UTF16
{0,0,2,1,2,0,1},//CHARSET_GB18030
{0,0,2,2,2,2,0},//CHARSET_LATIN1
};
enum ObCollationType
{
CS_TYPE_INVALID = 0,
CS_TYPE_LATIN1_SWEDISH_CI = 8,
CS_TYPE_GBK_CHINESE_CI = 28,
CS_TYPE_UTF8MB4_GENERAL_CI = 45,
CS_TYPE_UTF8MB4_BIN = 46,
CS_TYPE_LATIN1_BIN = 47,
CS_TYPE_UTF16_GENERAL_CI = 54,
CS_TYPE_UTF16_BIN = 55,
CS_TYPE_BINARY = 63,
@ -67,18 +88,21 @@ enum ObCollationType
CS_TYPE_GBK_ZH_0900_AS_CS,
CS_TYPE_UTF16_ZH_0900_AS_CS,
CS_TYPE_GB18030_ZH_0900_AS_CS,
CS_TYPE_latin1_ZH_0900_AS_CS, //invaid, not really used
//radical-stroke order
CS_TYPE_RADICAL_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH2_0900_AS_CS,
CS_TYPE_GBK_ZH2_0900_AS_CS,
CS_TYPE_UTF16_ZH2_0900_AS_CS,
CS_TYPE_GB18030_ZH2_0900_AS_CS,
CS_TYPE_latin1_ZH2_0900_AS_CS ,//invaid
//stroke order
CS_TYPE_STROKE_BEGIN_MARK,
CS_TYPE_UTF8MB4_ZH3_0900_AS_CS,
CS_TYPE_GBK_ZH3_0900_AS_CS,
CS_TYPE_UTF16_ZH3_0900_AS_CS,
CS_TYPE_GB18030_ZH3_0900_AS_CS,
CS_TYPE_GB18030_ZH3_0900_AS_CS,
CS_TYPE_latin1_ZH3_0900_AS_CS, //invaid
CS_TYPE_MAX
};
@ -88,6 +112,7 @@ enum ObCollationType
enum ObNlsCharsetId
{
CHARSET_INVALID_ID = 0,
CHARSET_WE8MSWIN1252_ID=31,
CHARSET_ZHS16GBK_ID = 852,
CHARSET_ZHS32GB18030_ID = 854,
CHARSET_UTF8_ID = 871,
@ -123,6 +148,7 @@ enum ObCollationLevel
// and we didn't persist it on storage.
};
struct ObCharsetWrapper
{
ObCharsetType charset_;
@ -167,9 +193,11 @@ public:
static const int32_t MIN_MB_LEN = 1;
static const int32_t MAX_CASE_MULTIPLY = 4;
//比如latin1 1byte ,utf8mb4 4byte,转换因子为4,也可以理解为最多使用4字节存储一个字符
static const int32_t CharConvertFactorNum = 4;
static const int64_t VALID_CHARSET_TYPES = 5;
static const int64_t VALID_COLLATION_TYPES = 11;
static const int64_t VALID_CHARSET_TYPES = 6;
static const int64_t VALID_COLLATION_TYPES = 13;
static int init_charset();
// strntodv2 is an enhanced version of strntod,
@ -348,7 +376,8 @@ public:
|| CHARSET_UTF8MB4 == charset_type
|| CHARSET_GBK == charset_type
|| CHARSET_UTF16 == charset_type
|| CHARSET_GB18030 == charset_type;
|| CHARSET_GB18030 == charset_type
|| CHARSET_LATIN1 == charset_type;
}
static ObCharsetType charset_type_by_coll(ObCollationType coll_type);
static int charset_name_by_coll(const ObString &coll_name, common::ObString &cs_name);

View File

@ -30,16 +30,23 @@
#define OB_UTF16_BIN OB_UTF16 "_bin"
#define OB_UTF16_UNICODE_CI OB_UTF16 "_unicode_ci"
#define OB_LATIN1 "latin1"
#define OB_LATIN1_SWEDISH_CI OB_LATIN1 "_swedish_ci"
#define OB_LATIN1_BIN OB_LATIN1 "_bin"
/* wm_wc and wc_mb return codes */
#define OB_CS_ILSEQ 0 // mb_wc wrong sequence
#define OB_CS_ILUNI 0 // wc_mb fail to encode Unicode to charset
// not enough bytes for wc_mb and mb_wc
#define OB_CS_TOOSMALL -101
#define OB_CS_TOOSMALL2 -102
#define OB_CS_TOOSMALL3 -103
#define OB_CS_TOOSMALL4 -104
#define OB_CS_TOOSMALL5 -105
#define OB_CS_TOOSMALL6 -106
#define OB_CS_ILSEQ 0 /* Wrong by sequence: wb_wc */
#define OB_CS_ILUNI 0 /* Cannot encode Unicode to charset: wc_mb */
#define OB_CS_SUCCESS 0
#define OB_CS_NUM_OUT_OF_RANGE -3
#define OB_CS_TOOSMALL -101 /* Need at least one byte: wc_mb and mb_wc */
#define OB_CS_TOOSMALL2 -102 /* Need at least two bytes: wc_mb and mb_wc */
#define OB_CS_TOOSMALL3 -103 /* Need at least three bytes: wc_mb and mb_wc */
/* These following three are currently not really used */
#define OB_CS_TOOSMALL4 -104 /* Need at least 4 bytes: wc_mb and mb_wc */
#define OB_CS_TOOSMALL5 -105 /* Need at least 5 bytes: wc_mb and mb_wc */
#define OB_CS_TOOSMALL6 -106 /* Need at least 6 bytes: wc_mb and mb_wc */
/* A helper macros for "need at least n bytes" */
#define OB_CS_TOOSMALLN(n) (-100-(n))
#define OB_SEQ_INTTAIL 1
@ -109,6 +116,9 @@
#define _MY_B 0100
#define _MY_X 0200
#define ob_toupper(s, c) (uchar)((s)->to_upper[(uchar)(c)])
#define ob_tolower(s, c) (uchar)((s)->to_lower[(uchar)(c)])
#define ob_sort_order(s,c) (uchar)((s)->sort_order[(uchar)(c)])
struct ObCharsetInfo;
struct ObUCAInfo;
@ -352,8 +362,6 @@ struct ObCharsetInfo
#define ob_toascii(c) ((c) & 0177)
#define ob_tocntrl(c) ((c) & 31)
#define ob_toprint(c) ((c) | 64)
#define ob_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
#define ob_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
#define ob_isalpha(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L) : 0)
#define ob_isupper(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_U : 0)
#define ob_islower(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_L : 0)
@ -426,7 +434,8 @@ extern ObCharsetInfo ob_charset_gb18030_bin;
extern ObCollationHandler ob_collation_mb_bin_handler;
extern ObCharsetHandler ob_charset_utf8mb4_handler;
extern ObCharsetHandler ob_charset_utf16_handler;
extern ObCollationHandler ob_collation_binary_handler;
extern ObCollationHandler ob_collation_8bit_simple_ci_handler;
//=============================================================================
void ob_fill_8bit(const ObCharsetInfo *cs, char* to, size_t l, int fill);
@ -604,8 +613,43 @@ size_t ob_strnxfrmlen_unicode_full_bin(const struct ObCharsetInfo *, size_t);
size_t ob_strnxfrmlen_utf8mb4(const struct ObCharsetInfo *, size_t);
uint ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
uint c __attribute__((unused)));
size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *b, const char *e);
size_t ob_charpos_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)),
size_t pos);
size_t ob_max_bytes_charpos_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)),
size_t max_bytes,
size_t *char_len);
size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
const char *ptr __attribute__((unused)),
size_t length);
int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
const uchar *s, const uchar *e);
size_t ob_well_formed_len_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *start, const char *end,
size_t nchars, int *error);
char *strmake(char *, const char *, size_t);
size_t ob_casedn_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char* str __attribute__((unused)), size_t srclen __attribute__((unused)),
char* dst __attribute__((unused)), size_t dstlen __attribute__((unused)));
size_t ob_caseup_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char* str __attribute__((unused)), size_t srclen __attribute__((unused)),
char* dst __attribute__((unused)), size_t dstlen __attribute__((unused)));
extern "C" void right_to_die_or_duty_to_live_c();

View File

@ -137,7 +137,7 @@ static int ob_wc_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
return OB_CS_ILUNI;
}
static int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
const unsigned char *str, const unsigned char *end)
{
if (str >= end) {
@ -389,7 +389,7 @@ static ObCharsetHandler ob_charset_handler=
ob_scan_8bit
};
static ObCollationHandler ob_collation_binary_handler =
ObCollationHandler ob_collation_binary_handler =
{
NULL,
NULL,

View File

@ -18,60 +18,391 @@
* - initial release
*
*/
#include "lib/charset/ob_mysql_global.h"
#include "lib/charset/ob_ctype.h"
#include "lib/utility/ob_macro_utils.h"
static unsigned char ctype_latin1[] = {
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 16, 16, 16, 16, 16,
16, 16, 129, 129, 129, 129, 129, 129, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16,
16, 16, 130, 130, 130, 130, 130, 130, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16,
32, 16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1,
0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2,
1, 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2,
2};
static uchar ob_ctype_latin1[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1, 0,
0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2, 1,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
};
static unsigned char to_lower_latin1[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 224, 225, 226,
227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
242, 243, 244, 245, 246, 215, 248, 249, 250, 251, 252, 253, 254, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255};
static unsigned char to_upper_latin1[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 192,
193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 247, 216, 217, 218, 219, 220, 221, 222,
255};
static unsigned char sort_order_latin1[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 65, 65, 65,
65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73, 68, 78,
79, 79, 79, 79, 93, 215, 216, 85, 85, 85, 89, 89, 222, 223, 65,
65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
68, 78, 79, 79, 79, 79, 93, 247, 216, 85, 85, 85, 89, 89, 222,
255};
ObCharsetInfo ob_charset_latin1=
static unsigned short cs_to_uni[] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011,
0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A,
0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021, 0x0022, 0x0023,
0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C,
0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E,
0x003F, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062,
0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B,
0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D,
0x007E, 0x007F, 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020,
0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC,
0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, 0x00A0, 0x00A1,
0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA,
0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3,
0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC,
0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5,
0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE,
0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, 0x00E0,
0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2,
0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB,
0x00FC, 0x00FD, 0x00FE, 0x00FF};
static unsigned char pl00[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53,
0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x00, 0x81, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x8F,
0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x9D, 0x00, 0x00, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
0xFC, 0xFD, 0xFE, 0xFF};
static unsigned char pl01[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8C, 0x9C,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x8A, 0x9A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x9F, 0x00, 0x00, 0x00, 0x00, 0x8E, 0x9E, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
static unsigned char pl02[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
static unsigned char pl20[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00,
0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, 0x86, 0x87, 0x95, 0x00,
0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8B, 0x9B, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
static unsigned char pl21[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
static unsigned char *uni_to_cs[] = {
pl00, pl01, pl02, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
pl20, pl21, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *pwc, const unsigned char *str, const unsigned char *end) {
if (str >= end) return OB_CS_TOOSMALL;
*pwc = cs_to_uni[*str];
return (!pwc[0] && str[0]) ? -1 : 1;
}
static int ob_wc_mb_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc, unsigned char *str, unsigned char *end) {
const unsigned char *pl;
if (str >= end) return OB_CS_TOOSMALL;
if (wc > 0xFFFF) return OB_CS_ILUNI;
pl = uni_to_cs[wc >> 8];
str[0] = pl ? pl[wc & 0xFF] : '\0';
return (!str[0] && wc) ? OB_CS_ILUNI : 1;
}
static ObCharsetHandler ob_charset_latin1_handler=
{
8,0,0,
OB_CS_COMPILED | OB_CS_PRIMARY,
"latin1",
"latin1_swedish_ci",
"",
NULL,
NULL,
ob_ctype_latin1,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
1,
1,
1,
1,
1,
0,
255,
' ',
0,
1,
1,
NULL,
NULL ,
PAD_SPACE
//NULL, /* init */
NULL, /* ismbchar */
ob_mbcharlen_8bit, /* mbcharlen */
ob_numchars_8bit,
ob_charpos_8bit,
ob_max_bytes_charpos_8bit,
ob_well_formed_len_8bit,
ob_lengthsp_binary,
//ob_numcells_8bit,
ob_mb_wc_latin1,
ob_wc_mb_latin1,
ob_mb_ctype_8bit,
//ob_case_str_bin,
//ob_case_str_bin,
ob_caseup_8bit,
ob_casedn_8bit,
//ob_snprintf_8bit,
//ob_long10_to_str_8bit,
//ob_longlong10_to_str_8bit,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
};
ObCharsetInfo ob_charset_latin1 = {
8,0,0, /* number */
OB_CS_COMPILED | OB_CS_PRIMARY, /* state */
OB_LATIN1, /* cs name */
OB_LATIN1_SWEDISH_CI, /* name */
"cp1252 West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
NULL, /* uca */
//NULL, /* tab_to_uni */
//NULL, /* tab_from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
0xFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_latin1_handler,
&ob_collation_8bit_simple_ci_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_latin1_bin = {
47,0,0, /* number */
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
OB_LATIN1, /* cs name */
OB_LATIN1_BIN, /* name */
"cp1252 West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
NULL, /* sort_order */
NULL, /* uca */
//NULL, /* tab_to_uni */
//NULL, /* tab_from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
0xFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_latin1_handler,
&ob_collation_binary_handler,
PAD_SPACE};

View File

@ -832,3 +832,269 @@ size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char
return frm_end - str;
}
size_t ob_caseup_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char* src __attribute__((unused)), size_t srclen __attribute__((unused)),
char* dst __attribute__((unused)), size_t dstlen __attribute__((unused))){
const char *end = src + srclen;
ob_charset_assert(src == dst && srclen == dstlen);
for (; src != end; src++) *src = ob_toupper(cs,*src);
return srclen;
}
size_t ob_casedn_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char* src __attribute__((unused)), size_t srclen __attribute__((unused)),
char* dst __attribute__((unused)), size_t dstlen __attribute__((unused))){
char *end = src + srclen;
ob_charset_assert(src == dst && srclen == dstlen);
for (; src != end; src++) *src = ob_tolower(cs,*src);
return srclen;
}
int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
bool is_prefix)
{
size_t len = (slen > tlen) ? tlen : slen;
if (is_prefix && slen > tlen) slen = tlen;
while (len--) {
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
}
s++;
t++;
}
return slen > tlen ? 1 : slen < tlen ? -1 : 0;
}
static int ob_strnncollsp_simple(const ObCharsetInfo *cs
__attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
bool diff_if_only_endspace_difference
__attribute__((unused)))
{
size_t len = (slen > tlen) ? tlen : slen;
for (size_t i = 0; i < len; i++){
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
}
s++;
t++;
}
int res = 0;
if (slen != tlen) {
int swap = 1;
if (diff_if_only_endspace_difference){
res=1;
}
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (slen < tlen) {
slen = tlen;
s = t;
swap = -1;
res = -res;
}
/*
"a" == "a "
"a\0" < "a"
"a\0" < "a "
*/
for (const unsigned char* end = s + slen - len; s < end; s++) {
if (ob_sort_order(cs,*s) != ob_sort_order(cs,(int)(' ')))
return ob_sort_order(cs,*s) < ob_sort_order(cs,(int)(' ')) ? -swap : swap;
}
}
return res;
}
static size_t ob_strnxfrm_simple(const ObCharsetInfo* cs __attribute__((unused)), unsigned char* dst, size_t dstlen,
uint nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
{
uchar *dst0 = dst;
const uchar *end;
const uchar *remainder;
size_t frmlen;
frmlen = dstlen > nweights ? nweights : dstlen;
frmlen = frmlen > srclen ? srclen : frmlen;
end = src + frmlen;
remainder = src + (frmlen % 8);
for (; src < remainder;) *dst++ = ob_sort_order(cs,*src++);
while(src < end) {
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
*dst++ = ob_sort_order(cs,*src++);
}
return ob_strxfrm_pad_desc_and_reverse(cs, dst0, dst, dst0 + dstlen, nweights - srclen, flags, 0);
}
#define likeconv(s, A) (A)
#define INC_PTR(cs, A, B) (A)++
static int ob_wildcmp_8bit_impl(const ObCharsetInfo* cs, const char* str_ptr, const char* str_end_ptr,
const char* wild_str, const char* wild_end, int escape_char, int w_one_char, int w_many_char, int recurse_level)
{
int cmp_result = -1;
while (wild_str != wild_end) {
while (*wild_str != w_many_char && *wild_str != w_one_char) {
if (*wild_str == escape_char && wild_str + 1 != wild_end) {
wild_str++;
}
if (str_ptr == str_end_ptr || likeconv(cs, *wild_str++) != likeconv(cs, *str_ptr++)) {
return 1;
}
if (wild_str == wild_end) {
return str_ptr != str_end_ptr;
}
cmp_result = 1;
}
if (*wild_str == w_one_char) {
do {
if (str_ptr == str_end_ptr) {
return (cmp_result);
}
INC_PTR(cs, str_ptr, str_end_ptr);
} while (++wild_str < wild_end && *wild_str == w_one_char);
if (wild_str == wild_end) {
break;
}
}
if (*wild_str == w_many_char) {
unsigned char cmp = 0;
wild_str++;
for (; wild_str != wild_end; wild_str++) {
if (*wild_str == w_many_char) {
continue;
}
if (*wild_str == w_one_char) {
if (str_ptr == str_end_ptr) {
return (-1);
}
INC_PTR(cs, str_ptr, str_end_ptr);
continue;
}
break;
}
if (wild_str == wild_end) {
return (0);
}
if (str_ptr == str_end_ptr) {
return (-1);
}
if ((cmp = *wild_str) == escape_char && wild_str + 1 != wild_end) {
cmp = *++wild_str;
}
INC_PTR(cs, wild_str, wild_end);
cmp = likeconv(cs, cmp);
do {
while (str_ptr != str_end_ptr && (unsigned char)likeconv(cs, *str_ptr) != cmp) {
str_ptr++;
}
if (str_ptr++ == str_end_ptr) {
return -1;
}
do {
int tmp = ob_wildcmp_8bit_impl(
cs, str_ptr, str_end_ptr, wild_str, wild_end, escape_char, w_one_char, w_many_char, recurse_level + 1);
if (tmp <= 0) {
return tmp;
}
} while (0);
} while (str_ptr != str_end_ptr);
return -1;
}
}
return str_ptr != str_end_ptr ? 1 : 0;
}
int ob_wildcmp_8bit(const ObCharsetInfo* cs, const char* str, const char* str_end, const char* wildstr,
const char* wildend, int escape, int w_one, int w_many)
{
return ob_wildcmp_8bit_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1);
}
uint32_t ob_instr_simple(const ObCharsetInfo* cs __attribute__((unused)), const char* b, size_t b_length,
const char* s, size_t s_length, ob_match_t* match, uint nmatch)
{
register const unsigned char *str, *search, *end, *search_end;
if (s_length <= b_length) {
if (!s_length) {
if (nmatch) {
match->beg = 0;
match->end = 0;
match->mb_len = 0;
}
return 1; /* Empty string is always found */
}
str = (const unsigned char*)b;
search = (const unsigned char*)s;
end = (const unsigned char*)b + b_length - s_length + 1;
search_end = (const unsigned char*)s + s_length;
skip:
while (str != end) {
if ((*str++) == (*search)) {
register const unsigned char *i, *j;
i = str;
j = search + 1;
while (j != search_end)
if ((*i++) != (*j++))
goto skip;
if (nmatch > 0) {
match[0].beg = 0;
match[0].end = (size_t)(str - (const unsigned char*)b - 1);
match[0].mb_len = match[0].end;
if (nmatch > 1) {
match[1].beg = match[0].end;
match[1].end = match[0].end + s_length;
match[1].mb_len = match[1].end - match[1].beg;
}
}
return 2;
}
}
}
return 0;
}
ObCollationHandler ob_collation_8bit_simple_ci_handler = {
NULL, /* init */
NULL,
ob_strnncoll_simple,
ob_strnncollsp_simple,
ob_strnxfrm_simple,
ob_strnxfrmlen_simple,
NULL,//varlen
ob_like_range_simple,
ob_wildcmp_8bit,
NULL,//ob_strcasecmp_8bit,
ob_instr_simple,
ob_hash_sort_simple,
ob_propagate_simple};
#undef likeconv
#undef INC_PTR