[enhancement](function) change some function nullable mode (#30991)

change some function nullable mode
This commit is contained in:
koarz
2024-02-18 13:08:40 +08:00
committed by yiguolei
parent 68102fd531
commit 6cf7468073
14 changed files with 211 additions and 50 deletions

View File

@ -140,8 +140,8 @@ struct FindInSetOp {
using ResultDataType = DataTypeInt32;
using ResultPaddedPODArray = PaddedPODArray<Int32>;
static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
for (int i = 0; i < strl.length(); ++i) {
if (strl[i] == ',') {
for (const auto& c : strl) {
if (c == ',') {
res = 0;
return;
}
@ -635,18 +635,12 @@ struct UnHexImpl {
}
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
NullMap& null_map) {
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
auto rows_count = offsets.size();
dst_offsets.resize(rows_count);
for (int i = 0; i < rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
continue;
}
auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
if (srclen == 0) {
@ -666,17 +660,103 @@ struct UnHexImpl {
int outlen = hex_decode(source, srclen, dst);
if (outlen < 0) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
} else {
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data,
dst_offsets);
}
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets);
}
return Status::OK();
}
};
struct UnHexOldImpl {
static constexpr auto name = "unhex";
using ReturnType = DataTypeString;
using ColumnType = ColumnString;
static bool check_and_decode_one(char& c, const char src_c, bool flag) {
int k = flag ? 16 : 1;
int value = src_c - '0';
// 9 = ('9'-'0')
if (value >= 0 && value <= 9) {
c += value * k;
return true;
}
value = src_c - 'A';
// 5 = ('F'-'A')
if (value >= 0 && value <= 5) {
c += (value + 10) * k;
return true;
}
value = src_c - 'a';
// 5 = ('f'-'a')
if (value >= 0 && value <= 5) {
c += (value + 10) * k;
return true;
}
// not in ( ['0','9'], ['a','f'], ['A','F'] )
return false;
}
static int hex_decode(const char* src_str, size_t src_len, char* dst_str) {
// if str length is odd or 0, return empty string like mysql dose.
if ((src_len & 1) != 0 or src_len == 0) {
return 0;
}
//check and decode one character at the same time
// character in ( ['0','9'], ['a','f'], ['A','F'] ), return 'NULL' like mysql dose.
for (auto i = 0, dst_index = 0; i < src_len; i += 2, dst_index++) {
char c = 0;
// combine two character into dst_str one character
bool left_4bits_flag = check_and_decode_one(c, *(src_str + i), true);
bool right_4bits_flag = check_and_decode_one(c, *(src_str + i + 1), false);
if (!left_4bits_flag || !right_4bits_flag) {
return 0;
}
*(dst_str + dst_index) = c;
}
return src_len / 2;
}
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
NullMap& null_map) {
auto rows_count = offsets.size();
dst_offsets.resize(rows_count);
for (int i = 0; i < rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
continue;
}
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
if (srclen == 0) {
StringOP::push_empty_string(i, dst_data, dst_offsets);
continue;
}
char dst_array[MAX_STACK_CIPHER_LEN];
char* dst = dst_array;
int cipher_len = srclen / 2;
std::unique_ptr<char[]> dst_uptr;
if (cipher_len > MAX_STACK_CIPHER_LEN) {
dst_uptr.reset(new char[cipher_len]);
dst = dst_uptr.get();
}
int outlen = hex_decode(source, srclen, dst);
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets);
}
return Status::OK();
}
};
struct NameStringSpace {
static constexpr auto name = "space";
};
@ -714,22 +794,16 @@ struct ToBase64Impl {
using ColumnType = ColumnString;
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
NullMap& null_map) {
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
auto rows_count = offsets.size();
dst_offsets.resize(rows_count);
for (int i = 0; i < rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
continue;
}
auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
if (srclen == 0) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
StringOP::push_empty_string(i, dst_data, dst_offsets);
continue;
}
@ -745,12 +819,50 @@ struct ToBase64Impl {
int outlen = base64_encode((const unsigned char*)source, srclen, (unsigned char*)dst);
if (outlen < 0) {
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets);
}
return Status::OK();
}
};
struct ToBase64OldImpl {
static constexpr auto name = "to_base64";
using ReturnType = DataTypeString;
using ColumnType = ColumnString;
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
NullMap& null_map) {
auto rows_count = offsets.size();
dst_offsets.resize(rows_count);
for (int i = 0; i < rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
} else {
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data,
dst_offsets);
continue;
}
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
if (srclen == 0) {
StringOP::push_empty_string(i, dst_data, dst_offsets);
continue;
}
char dst_array[MAX_STACK_CIPHER_LEN];
char* dst = dst_array;
int cipher_len = (int)(4.0 * ceil((double)srclen / 3.0));
std::unique_ptr<char[]> dst_uptr;
if (cipher_len > MAX_STACK_CIPHER_LEN) {
dst_uptr.reset(new char[cipher_len]);
dst = dst_uptr.get();
}
int outlen = base64_encode((const unsigned char*)source, srclen, (unsigned char*)dst);
StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data, dst_offsets);
}
return Status::OK();
}
@ -773,11 +885,11 @@ struct FromBase64Impl {
continue;
}
auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
if (srclen == 0) {
StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
StringOP::push_empty_string(i, dst_data, dst_offsets);
continue;
}
@ -946,8 +1058,11 @@ using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameTo
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
using FunctionUnHex = FunctionStringOperateToNullType<UnHexImpl>;
using FunctionToBase64 = FunctionStringOperateToNullType<ToBase64Impl>;
using FunctionUnHex = FunctionStringEncode<UnHexImpl>;
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl>;
using FunctionUnHexOld = FunctionStringOperateToNullType<UnHexOldImpl>;
using FunctionToBase64Old = FunctionStringOperateToNullType<ToBase64OldImpl>;
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
using FunctionStringAppendTrailingCharIfAbsent =
@ -1023,6 +1138,8 @@ void register_function_string(SimpleFunctionFactory& factory) {
factory.register_alternative_function<FunctionRightOld>();
factory.register_alternative_function<FunctionSubstringIndexOld>();
factory.register_alternative_function<FunctionStringRepeatOld>();
factory.register_alternative_function<FunctionUnHexOld>();
factory.register_alternative_function<FunctionToBase64Old>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");