Co-authored-by: wangt1xiuyi <13547954130@163.com> Co-authored-by: yangqise7en <877793735@qq.com> Co-authored-by: Zach41 <zach_41@163.com>
1009 lines
31 KiB
C++
1009 lines
31 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#define USING_LOG_PREFIX COMMON
|
|
|
|
#include "share/ob_order_perserving_encoder.h"
|
|
#include <byteswap.h>
|
|
namespace oceanbase
|
|
{
|
|
namespace share
|
|
{
|
|
int ObOrderPerservingEncoder::make_order_perserving_encode_from_object(ObObj &obj,
|
|
unsigned char *to,
|
|
int64_t max_buf_len,
|
|
int64_t &to_len)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
|
|
switch (obj.get_type()) {
|
|
// for integer values
|
|
case ObTinyIntType: {
|
|
if (to_len + sizeof(int8_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_int8(obj.get_tinyint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObSmallIntType: {
|
|
if (to_len + sizeof(int16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_int16(obj.get_smallint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObDateType:
|
|
case ObMediumIntType:
|
|
case ObInt32Type: {
|
|
if (to_len + sizeof(int32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_int32(obj.get_int32(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObIntervalYMType:
|
|
case ObTimeType:
|
|
case ObDateTimeType:
|
|
case ObTimestampType:
|
|
case ObIntType: {
|
|
if (to_len + sizeof(int64_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_int(obj.get_int(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObYearType:
|
|
case ObUTinyIntType: {
|
|
if (to_len + sizeof(uint8_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_uint8(obj.get_utinyint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUSmallIntType: {
|
|
if (to_len + sizeof(uint16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_uint16(obj.get_usmallint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUMediumIntType:
|
|
case ObUInt32Type: {
|
|
if (to_len + sizeof(uint32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_uint32(obj.get_uint32(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUInt64Type: {
|
|
if (to_len + sizeof(uint64_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_uint(obj.get_uint64(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
// for float values
|
|
case ObFloatType:
|
|
case ObUFloatType: {
|
|
if (to_len + sizeof(float) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_float(obj.get_float(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObDoubleType:
|
|
case ObUDoubleType: {
|
|
if (to_len + sizeof(double) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_double(obj.get_double(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
// for obnumber
|
|
case ObNumberType:
|
|
case ObUNumberType:
|
|
case ObNumberFloatType: {
|
|
if (OB_FAIL(encode_from_number(obj.get_number(), to, max_buf_len, to_len))) {
|
|
if (ret == OB_BUF_NOT_ENOUGH) {
|
|
// ignore ret
|
|
} else {
|
|
LOG_WARN("failed to encode number", K(ret));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
// for date
|
|
case ObTimestampTZType:
|
|
case ObTimestampLTZType:
|
|
case ObTimestampNanoType: {
|
|
if (to_len + sizeof(int64_t) + sizeof(uint16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_timestamp(obj.get_otimestamp_value(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObIntervalDSType: {
|
|
if (to_len + sizeof(int64_t) + sizeof(int32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()));
|
|
} else {
|
|
encode_from_interval_ds(obj.get_interval_ds(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObVarcharType:
|
|
case ObNVarchar2Type:
|
|
case ObRawType:
|
|
case ObNCharType:
|
|
case ObCharType: {
|
|
if (OB_FAIL(encode_from_string_varlen(obj.get_string(), to, max_buf_len, to_len,
|
|
obj.get_collation_type()))) {
|
|
if (ret == OB_BUF_NOT_ENOUGH) {
|
|
// ignore ret
|
|
} else {
|
|
LOG_WARN("failed to encode string", K(ret));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case ObDecimalIntType: {
|
|
if (to_len + obj.get_int_bytes() > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(obj.get_type()),
|
|
K(obj.get_int_bytes()));
|
|
} else if (OB_FAIL(
|
|
encode_from_decint(obj.get_decimal_int(), obj.get_int_bytes(), to, to_len))) {
|
|
LOG_WARN("encode from decimal int failed", K(ret));
|
|
}
|
|
break;
|
|
}
|
|
case ObURowIDType:
|
|
case ObUnknownType:
|
|
case ObTinyTextType:
|
|
case ObTextType:
|
|
case ObMediumTextType:
|
|
case ObLongTextType:
|
|
case ObBitType:
|
|
case ObEnumType:
|
|
case ObSetType:
|
|
case ObEnumInnerType:
|
|
case ObSetInnerType:
|
|
case ObLobType:
|
|
case ObExtendType:
|
|
case ObHexStringType:
|
|
default: {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_WARN("this type cannot make sortkey", K(ret), K(obj.get_type()));
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::make_order_perserving_encode_from_object(
|
|
ObDatum &data, unsigned char *to, int64_t max_buf_len, int64_t &to_len, ObEncParam ¶m)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
switch (param.type_) {
|
|
// for integer values
|
|
case ObTinyIntType: {
|
|
if (to_len + sizeof(int8_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_int8(data.get_tinyint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObSmallIntType: {
|
|
if (to_len + sizeof(int16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_int16(data.get_smallint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObDateType:
|
|
case ObMediumIntType:
|
|
case ObInt32Type: {
|
|
if (to_len + sizeof(int32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_int32(data.get_int32(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObIntervalYMType:
|
|
case ObTimeType:
|
|
case ObDateTimeType:
|
|
case ObTimestampType:
|
|
case ObIntType: {
|
|
if (to_len + sizeof(int64_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_int(data.get_int(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObYearType:
|
|
case ObUTinyIntType: {
|
|
if (to_len + sizeof(uint8_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_uint8(data.get_utinyint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUSmallIntType: {
|
|
if (to_len + sizeof(uint16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_uint16(data.get_usmallint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUMediumIntType:
|
|
case ObUInt32Type: {
|
|
if (to_len + sizeof(uint32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_uint32(data.get_uint32(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObUInt64Type: {
|
|
if (to_len + sizeof(uint64_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_uint(data.get_uint(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
// for float values
|
|
case ObFloatType:
|
|
case ObUFloatType: {
|
|
if (to_len + sizeof(float) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_float(data.get_float(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObDoubleType:
|
|
case ObUDoubleType: {
|
|
if (to_len + sizeof(double) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_double(data.get_double(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
// for obnumber
|
|
case ObNumberType:
|
|
case ObUNumberType:
|
|
case ObNumberFloatType: {
|
|
if (OB_FAIL(encode_from_number(data.get_number(), to, max_buf_len, to_len))) {
|
|
if (ret == OB_BUF_NOT_ENOUGH) {
|
|
// ignore ret
|
|
} else {
|
|
LOG_WARN("failed to encode number", K(ret));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
// for date
|
|
case ObTimestampTZType: {
|
|
if (to_len + sizeof(int64_t) + sizeof(uint16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_timestamp(data.get_otimestamp_tz(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObTimestampLTZType:
|
|
case ObTimestampNanoType: {
|
|
if (to_len + sizeof(int64_t) + sizeof(uint16_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_timestamp(data.get_otimestamp_tiny(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObIntervalDSType: {
|
|
if (to_len + sizeof(int64_t) + sizeof(int32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(param.type_));
|
|
} else {
|
|
encode_from_interval_ds(data.get_interval_ds(), to, to_len);
|
|
}
|
|
break;
|
|
}
|
|
case ObRawType:
|
|
case ObVarcharType:
|
|
case ObNVarchar2Type: {
|
|
param.is_var_len_ = false;
|
|
if (OB_FAIL(encode_from_string_varlen(data.get_string(), to, max_buf_len, to_len, param))) {
|
|
if (ret == OB_BUF_NOT_ENOUGH) {
|
|
// ignore ret
|
|
} else {
|
|
LOG_WARN("failed to encode fix len str", K(ret));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case ObNCharType:
|
|
case ObCharType: {
|
|
if (OB_FAIL(encode_from_string_varlen(data.get_string(), to, max_buf_len, to_len, param))) {
|
|
if (ret == OB_BUF_NOT_ENOUGH) {
|
|
// ignore ret
|
|
} else {
|
|
LOG_WARN("failed to encode string", K(ret));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case ObDecimalIntType: {
|
|
if (to_len + data.len_ > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding", K(ret), K(data.len_), K(param.type_));
|
|
} else if (OB_FAIL(encode_from_decint(data.get_decimal_int(), data.len_, to, to_len))) {
|
|
LOG_WARN("encode from decimal int failed", K(ret));
|
|
}
|
|
break;
|
|
}
|
|
case ObURowIDType:
|
|
case ObUnknownType:
|
|
case ObTinyTextType:
|
|
case ObTextType:
|
|
case ObMediumTextType:
|
|
case ObLongTextType:
|
|
case ObBitType:
|
|
case ObEnumType:
|
|
case ObSetType:
|
|
case ObEnumInnerType:
|
|
case ObSetInnerType:
|
|
case ObLobType:
|
|
case ObExtendType:
|
|
case ObHexStringType:
|
|
default: {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_WARN("this type cannot make sortkey", K(ret), K(param.type_));
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
// used for memcmp comparsion
|
|
int ObOrderPerservingEncoder::convert_ob_charset_utf8mb4_bin(unsigned char *data,
|
|
int64_t len,
|
|
unsigned char *to,
|
|
int64_t &to_len)
|
|
{
|
|
unsigned char *d_e = data + len;
|
|
while (data < d_e) {
|
|
*to = *data;
|
|
if (*data == 0x00) {
|
|
to++;
|
|
to_len++;
|
|
*to = 0x01;
|
|
}
|
|
data++;
|
|
to++;
|
|
to_len++;
|
|
}
|
|
*to = 0x00;
|
|
*(to + 1) = 0x00;
|
|
to_len += 2;
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
// used for space comparsion (0x20)
|
|
int ObOrderPerservingEncoder::convert_ob_charset_utf8mb4_bin_sp(unsigned char *data,
|
|
int64_t len,
|
|
unsigned char *to,
|
|
int64_t &to_len)
|
|
{
|
|
unsigned char *d_e = data + len;
|
|
while (*(d_e - 1) == 0x20 && d_e - 1 >= data)
|
|
d_e--;
|
|
|
|
while (data < d_e) {
|
|
if (*data == 0x20) {
|
|
int16_t sp_cnt = 0;
|
|
while (*data == 0x20) {
|
|
sp_cnt++;
|
|
data++;
|
|
if (data == d_e)
|
|
sp_cnt = 0;
|
|
}
|
|
|
|
int16_t sp_cnt_mask = 0;
|
|
int16_t tmp = (int16_t)((*data) - 0x20);
|
|
int16_t x = (~tmp) >> 16;
|
|
MEMCPY(to, (unsigned char *)&x, 2);
|
|
*to = 0x20;
|
|
if (tmp > 0) {
|
|
*(to+1) = 0x21;
|
|
sp_cnt_mask = 0xFFFF;
|
|
} else {
|
|
*(to+1) = 0x19;
|
|
sp_cnt_mask = 0;
|
|
}
|
|
to += 2;
|
|
sp_cnt = ((sp_cnt) ^ sp_cnt_mask) ^ 0x8000;
|
|
sp_cnt = bswap_16(sp_cnt);
|
|
MEMCPY(to, (unsigned char *)&sp_cnt, 2);
|
|
to += 2;
|
|
to_len += 4;
|
|
}
|
|
|
|
*to = *data;
|
|
data++;
|
|
to++;
|
|
to_len++;
|
|
}
|
|
*to = 0x20;
|
|
*(to + 1) = 0x20;
|
|
to_len += 2;
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_string_varlen(
|
|
ObString str, unsigned char *to, int64_t max_buf_len, int64_t &to_len, ObCollationType cs)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
bool is_valid_uni = false;
|
|
bool is_mem = lib::is_oracle_mode();
|
|
|
|
int64_t safety_buf_size = 20;
|
|
// tail is up to 8 byte and [space] will be expand to 10byte,
|
|
// therefore safty buffer size round up to 20(byte)
|
|
// and src will only expand 7 times at most when encoding.
|
|
// for bad case
|
|
// [space] A [space] A
|
|
// [space] will expand to 10 byte
|
|
// A will expand to 4 byte
|
|
// therefore src will expand (10+4)/2=>7 times at most when encoding
|
|
if ((to_len + 7 * str.length() + safety_buf_size) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for string", K(ret));
|
|
} else if (str.empty() || (str.length()==1 && *str.ptr()=='\0')) {
|
|
if (OB_FAIL(encode_tails(to, max_buf_len, to_len, is_mem, cs, str.length()==1 && *str.ptr()=='\0'))) {
|
|
LOG_WARN("failed to encode tails", K(ret));
|
|
}
|
|
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
|
|
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN
|
|
|| cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
|
if (is_mem) {
|
|
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
} else {
|
|
convert_ob_charset_utf8mb4_bin_sp((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
}
|
|
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
|
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
|
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
|
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
|
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
|
max_buf_len - to_len - safety_buf_size,
|
|
is_mem, is_valid_uni);
|
|
if (res_len < 0) {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_TRACE("not support collation", K(cs));
|
|
} else {
|
|
to_len += res_len;
|
|
}
|
|
} else {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_TRACE("not support collation", K(cs));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_string_varlen(
|
|
ObString str, unsigned char *to, int64_t max_buf_len, int64_t &to_len, ObEncParam ¶m)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObCollationType cs = param.cs_type_;
|
|
bool is_valid_uni = false;
|
|
|
|
int64_t safty_buf_size = 20;
|
|
// tail is up to 8 byte and [space] will be expand to 10byte,
|
|
// therefore safty buffer size round up to 20(byte)
|
|
// and src will only expand 7 times at most when encoding.
|
|
// for bad case
|
|
// [space] A [space] A
|
|
// [space] will expand to 10 byte
|
|
// A will expand to 4 byte
|
|
// therefore src will expand (10+4)/2=>7 times at most when encoding
|
|
if ((to_len + 7 * str.length() + safty_buf_size) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for string", K(ret));
|
|
} else if (str.empty() || (str.length()==1 && *str.ptr()=='\0')) {
|
|
if (OB_FAIL(encode_tails(to, max_buf_len, to_len, param.is_memcmp_, cs, str.length()==1 && *str.ptr()=='\0'))) {
|
|
LOG_WARN("failed to encode tails", K(ret));
|
|
}
|
|
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
|
|
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
} else if (cs == CS_TYPE_UTF8MB4_BIN || cs == CS_TYPE_GBK_BIN ||
|
|
cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
|
if (param.is_memcmp_) {
|
|
convert_ob_charset_utf8mb4_bin((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
} else {
|
|
convert_ob_charset_utf8mb4_bin_sp((unsigned char *)str.ptr(), str.length(), to, to_len);
|
|
}
|
|
} else if (cs == CS_TYPE_UTF8MB4_GENERAL_CI || cs == CS_TYPE_GBK_CHINESE_CI
|
|
|| cs == CS_TYPE_UTF16_GENERAL_CI || cs == CS_TYPE_UTF16_BIN
|
|
|| cs == CS_TYPE_GB18030_CHINESE_CI ||
|
|
(CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
|
int64_t res_len = ObCharset::sortkey_var_len(cs, str.ptr(), str.length(), (char *)to,
|
|
max_buf_len - to_len - safty_buf_size,
|
|
param.is_memcmp_, param.is_valid_uni_);
|
|
if (!param.is_valid_uni_) {
|
|
// invalid unicode, do nothing
|
|
} else {
|
|
to_len += res_len;
|
|
}
|
|
} else {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_TRACE("not support collation", K(cs));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_string_fixlen(
|
|
ObString str, unsigned char *to, int64_t max_buf_len, int64_t &to_len, ObEncParam ¶m)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObCollationType cs = param.cs_type_;
|
|
bool is_valid_uni = false;
|
|
if ((to_len + 4 * str.length() + 2) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for fixed string", K(ret));
|
|
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY || cs == CS_TYPE_UTF8MB4_BIN
|
|
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN || cs == CS_TYPE_GB18030_2022_BIN) {
|
|
MEMCPY(to, str.ptr(), str.length());
|
|
to_len += str.length();
|
|
} else {
|
|
to_len
|
|
+= ObCharset::sortkey(cs, str.ptr(), str.length(), (char *)to, max_buf_len, is_valid_uni);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_int8(int8_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val ^= SIGN_MASK_8;
|
|
to_len += sizeof(int8_t);
|
|
*to = val;
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_int16(int16_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val ^= SIGN_MASK_16;
|
|
val = bswap_16(val);
|
|
to_len += sizeof(int16_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_int32(int32_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val ^= SIGN_MASK_32;
|
|
val = bswap_32(val);
|
|
to_len += sizeof(int32_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_int(int64_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val ^= SIGN_MASK_64;
|
|
val = bswap_64(val);
|
|
to_len += sizeof(int64_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_uint8(uint8_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
to_len += sizeof(uint8_t);
|
|
*to = val;
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_uint16(uint16_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val = bswap_16(val);
|
|
to_len += sizeof(uint16_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_uint32(uint32_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val = bswap_32(val);
|
|
to_len += sizeof(uint32_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_uint(uint64_t val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
val = bswap_64(val);
|
|
to_len += sizeof(uint64_t);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_double(double val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
// to avoid +0 and -0
|
|
if (val == 0.0) {
|
|
val = 0.0;
|
|
}
|
|
|
|
int64_t val_int;
|
|
to_len += sizeof(val);
|
|
MEMCPY(&val_int, &val, sizeof(val));
|
|
// int: neg pad FF, pos pad 00
|
|
val_int = (val_int ^ (val_int >> 63)) | ((~val_int) & 0x8000000000000000ULL);
|
|
val_int = bswap_64(val_int);
|
|
MEMCPY(to, &val_int, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_float(float val, unsigned char *to, int64_t &to_len)
|
|
{
|
|
// to avoid +0 and -0
|
|
if (val == 0.0) {
|
|
val = 0.0;
|
|
}
|
|
|
|
int32_t val_int;
|
|
to_len += sizeof(val);
|
|
MEMCPY(&val_int, &val, sizeof(val));
|
|
// int: neg pad FF, pos pad 00
|
|
val_int = (val_int ^ (val_int >> 31)) | ((~val_int) & 0x80000000U);
|
|
val_int = bswap_32(val_int);
|
|
MEMCPY(to, &val_int, sizeof(val));
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_number(ObNumber val,
|
|
unsigned char *to,
|
|
int64_t max_buf_len,
|
|
int64_t &to_len)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
ObNumberDesc desc = val.d_;
|
|
if (to_len + sizeof(int8_t) + desc.len_ * sizeof(uint32_t) + 2 * sizeof(int32_t) > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for obnumber", K(ret));
|
|
} else {
|
|
int8_t se = desc.se_;
|
|
// int: neg pad FF, pos pad 00
|
|
*to = se;
|
|
to_len++;
|
|
to++;
|
|
|
|
// digits encoding
|
|
int32_t digits_mask = static_cast<int32_t>((int64_t)((~se) ^ 0x80) >> 8);
|
|
uint32_t *digits_ptr = val.get_digits();
|
|
for (int64_t i = 0; i < desc.len_; i++) {
|
|
uint32_t dig = bswap_32((digits_ptr[i] + 1) ^ digits_mask);
|
|
MEMCPY(to, &dig, sizeof(dig));
|
|
to_len += sizeof(dig);
|
|
to += sizeof(dig);
|
|
}
|
|
MEMCPY(to, &digits_mask, sizeof(digits_mask));
|
|
to_len += sizeof(digits_mask);
|
|
to += sizeof(digits_mask);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_timestamp(ObOTimestampData val,
|
|
unsigned char *to,
|
|
int64_t &to_len)
|
|
{
|
|
int64_t time_us = val.time_us_;
|
|
uint16_t nsec = val.time_ctx_.tail_nsec_;
|
|
|
|
uint64_t t1 = time_us ^ SIGN_MASK_64;
|
|
t1 = bswap_64(t1);
|
|
|
|
MEMCPY(to, (unsigned char *)&t1, sizeof(uint64_t));
|
|
to_len += sizeof(t1);
|
|
to += sizeof(t1);
|
|
|
|
nsec = bswap_32(nsec);
|
|
MEMCPY(to, (unsigned char *)&nsec, sizeof(nsec));
|
|
to_len += sizeof(nsec);
|
|
to += sizeof(nsec);
|
|
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_interval_ds(ObIntervalDSValue val,
|
|
unsigned char *to,
|
|
int64_t &to_len)
|
|
{
|
|
int64_t nsec = val.nsecond_;
|
|
int32_t frac_nsec = val.fractional_second_;
|
|
|
|
nsec ^= SIGN_MASK_64;
|
|
nsec = bswap_64(nsec);
|
|
MEMCPY(to, (unsigned char *)&nsec, sizeof(nsec));
|
|
to_len += sizeof(nsec);
|
|
to += sizeof(nsec);
|
|
|
|
frac_nsec ^= SIGN_MASK_32;
|
|
frac_nsec = bswap_32(frac_nsec);
|
|
MEMCPY(to, (unsigned char *)&frac_nsec, sizeof(frac_nsec));
|
|
to_len += sizeof(frac_nsec);
|
|
to += sizeof(frac_nsec);
|
|
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
template <typename T>
|
|
int ObOrderPerservingEncoder::encode_from_decint(const T &decint, unsigned char *to,
|
|
int64_t &to_len)
|
|
{
|
|
static_assert(wide::IsWideInteger<T>::value, "");
|
|
int ret = OB_SUCCESS;
|
|
const int item_count = T::ITEM_COUNT;
|
|
int64_t high = static_cast<int64_t>(decint.items_[item_count - 1]);
|
|
// encode_from_int(high, to, to_len);
|
|
high ^= SIGN_MASK_64;
|
|
high = bswap_64(high);
|
|
MEMCPY(to, (unsigned char *)&high, sizeof(high));
|
|
to += sizeof(high);
|
|
to_len += sizeof(high);
|
|
for (int i = item_count - 2; i >= 0; i--) {
|
|
uint64_t val = bswap_64(decint.items_[i]);
|
|
MEMCPY(to, (unsigned char *)&val, sizeof(val));
|
|
to_len += sizeof(val);
|
|
to += sizeof(val);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_from_decint(const ObDecimalInt *decint, int32_t int_bytes, unsigned char *to, int64_t &to_len)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
if (OB_ISNULL(decint)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
LOG_WARN("invalid decimal int", K(ret), K(decint));
|
|
} else {
|
|
switch (int_bytes) {
|
|
case sizeof(int32_t): {
|
|
ret = encode_from_int32(*reinterpret_cast<const int32_t *>(decint), to, to_len);
|
|
break;
|
|
}
|
|
case sizeof(int64_t): {
|
|
ret = encode_from_int(*reinterpret_cast<const int64_t *>(decint), to, to_len);
|
|
break;
|
|
}
|
|
case sizeof(int128_t): {
|
|
ret = encode_from_decint(*reinterpret_cast<const int128_t *>(decint), to, to_len);
|
|
break;
|
|
}
|
|
case sizeof(int256_t): {
|
|
ret = encode_from_decint(*reinterpret_cast<const int256_t *>(decint), to, to_len);
|
|
break;
|
|
}
|
|
case sizeof(int512_t): {
|
|
ret = encode_from_decint(*reinterpret_cast<const int512_t *>(decint), to, to_len);
|
|
break;
|
|
}
|
|
default: {
|
|
ret = OB_ERR_UNEXPECTED;
|
|
LOG_WARN("unexpected int bytes", K(ret), K(int_bytes));
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObOrderPerservingEncoder::encode_tails(unsigned char *to, int64_t max_buf_len,
|
|
int64_t &to_len, bool is_mem,
|
|
common::ObCollationType cs, bool with_empty_str)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
// do nothing
|
|
if (to_len + 8 > max_buf_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_WARN("no enough memory to do encoding for string", K(ret));
|
|
} else if (cs == CS_TYPE_COLLATION_FREE || cs == CS_TYPE_BINARY) {
|
|
if (with_empty_str) {
|
|
*to = 0x00;
|
|
to++;
|
|
to_len++;
|
|
}
|
|
*to = 0x00;
|
|
*(to+1) = 0x00;
|
|
to_len += 2;
|
|
} else if (cs == CS_TYPE_UTF8MB4_BIN
|
|
|| cs == CS_TYPE_GBK_BIN || cs == CS_TYPE_GB18030_BIN
|
|
|| cs == CS_TYPE_GB18030_2022_BIN
|
|
|| cs == CS_TYPE_UTF8MB4_GENERAL_CI) {
|
|
if (with_empty_str) {
|
|
*to = 0x00;
|
|
to++;
|
|
to_len++;
|
|
}
|
|
if (is_mem) {
|
|
*to = 0x00;
|
|
*(to+1) = 0x00;
|
|
} else {
|
|
*to = 0x20;
|
|
*(to+1) = 0x20;
|
|
}
|
|
to_len += 2;
|
|
} else if ( cs == CS_TYPE_GBK_CHINESE_CI
|
|
|| cs == CS_TYPE_UTF16_GENERAL_CI) {
|
|
if (with_empty_str) {
|
|
MEMSET(to, 0x00, 2);
|
|
to += 2;
|
|
to_len += 2;
|
|
}
|
|
if (is_mem) {
|
|
MEMSET(to, 0x00, 4);
|
|
} else {
|
|
MEMSET(to, 0x00, 4);
|
|
*(to+1) = 0x20;
|
|
*(to+3) = 0x20;
|
|
}
|
|
to_len += 4;
|
|
} else if (cs == CS_TYPE_UTF16_BIN
|
|
|| cs == CS_TYPE_GB18030_CHINESE_CI
|
|
|| (CS_TYPE_GB18030_2022_PINYIN_CI <= cs && cs <= CS_TYPE_GB18030_2022_STROKE_CS)) {
|
|
if (with_empty_str) {
|
|
MEMSET(to, 0x00, 4);
|
|
to += 4;
|
|
to_len += 4;
|
|
}
|
|
if (is_mem) {
|
|
MEMSET(to, 0x00, 8);
|
|
} else {
|
|
MEMSET(to, 0x00, 8);
|
|
*(to+3) = 0x20;
|
|
*(to+7) = 0x20;
|
|
}
|
|
to_len += 8;
|
|
} else {
|
|
ret = OB_NOT_SUPPORTED;
|
|
LOG_WARN("not support collation", K(cs));
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int ObSortkeyConditioner::process_key_conditioning(
|
|
ObDatum &data, unsigned char *to, int64_t max_buf_len, int64_t &to_len, ObEncParam ¶m)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
// process null pos
|
|
if (OB_ISNULL(to)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
LOG_WARN("invalid arg", K(ret), K(to));
|
|
} else if (max_buf_len < 1) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for obnumber", K(ret));
|
|
} else if (param.is_nullable_) {
|
|
if (param.is_null_first_)
|
|
*to = (param.type_ == ObNullType || data.is_null()) ? 0x00 : 0x01;
|
|
else
|
|
*to = (param.type_ == ObNullType || data.is_null()) ? 0x02 : 0x01;
|
|
to_len++;
|
|
}
|
|
|
|
if (OB_FAIL(ret)) {
|
|
// do nothing
|
|
} else if (*to != 0x01) {
|
|
// do nothing
|
|
} else if (OB_FAIL(share::ObOrderPerservingEncoder::make_order_perserving_encode_from_object(
|
|
data, to + to_len, max_buf_len, to_len, param))) {
|
|
if (ret != OB_BUF_NOT_ENOUGH) {
|
|
LOG_WARN("failed to encode sortkey", K(ret));
|
|
}
|
|
} else if (max_buf_len < to_len) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for obnumber", K(ret));
|
|
} else if (!param.is_asc_) {
|
|
if (param.is_nullable_) {
|
|
process_decrease(to + 1, to_len - 1);
|
|
} else {
|
|
process_decrease(to, to_len);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObSortkeyConditioner::process_key_conditioning(ObObj &obj,
|
|
unsigned char *to,
|
|
int64_t max_buf_len,
|
|
int64_t &to_len)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
// process null pos
|
|
if (OB_ISNULL(to)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
LOG_WARN("invalid arg", K(ret), K(to));
|
|
} else if (max_buf_len < 1) {
|
|
ret = OB_BUF_NOT_ENOUGH;
|
|
LOG_TRACE("no enough memory to do encoding for obnumber", K(ret));
|
|
} else {
|
|
*to = (obj.is_null()) ? 0x00 : 0x01;
|
|
to_len++;
|
|
}
|
|
|
|
if (OB_FAIL(ret)) {
|
|
// do nothing
|
|
} else if (*to != 0x01) {
|
|
// do nothing
|
|
} else if (OB_FAIL(share::ObOrderPerservingEncoder::make_order_perserving_encode_from_object(
|
|
obj, to + to_len, max_buf_len, to_len))) {
|
|
LOG_WARN("failed to encode sortkey", K(ret));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
// simd opt
|
|
void ObSortkeyConditioner::process_decrease(unsigned char *to, int64_t to_len)
|
|
{
|
|
for (int64_t i = 0; i < to_len; i++) {
|
|
*(to + i) ^= 0xFF;
|
|
}
|
|
}
|
|
|
|
} // namespace share
|
|
} // end namespace oceanbase
|