[refactor](remove non vec code) remove json functions string functions match functions and some code (#16141)

remove json functions code
remove string functions code
remove math functions code
move MatchPredicate to olap since it is only used in storage predicate process
remove some code in tuple, Tuple structure should be removed in the future.
remove many code in collection value structure, they are useless
This commit is contained in:
yiguolei
2023-01-26 16:21:12 +08:00
committed by GitHub
parent 615a5e7b51
commit adb758dcac
55 changed files with 50 additions and 7952 deletions

View File

@ -100,414 +100,6 @@ double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsign
return tmp2;
}
void MathFunctions::init() {}
DoubleVal MathFunctions::pi(FunctionContext* ctx) {
return DoubleVal(M_PI);
}
DoubleVal MathFunctions::e(FunctionContext* ctx) {
return DoubleVal(M_E);
}
// libc++ did not have std::abs for int128
__int128_t largeint_abs(__int128_t x) {
return x > 0 ? x : -x;
}
DecimalV2Val MathFunctions::abs(FunctionContext* ctx, const doris_udf::DecimalV2Val& val) {
if (val.is_null) {
return DecimalV2Val::null();
}
if (UNLIKELY(val.val == MIN_INT128)) {
return DecimalV2Val::null();
} else {
return DecimalV2Val(largeint_abs(val.val));
}
}
LargeIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::LargeIntVal& val) {
if (val.is_null) {
return LargeIntVal::null();
}
if (UNLIKELY(val.val == MIN_INT128)) {
return LargeIntVal::null();
} else {
return LargeIntVal(largeint_abs(val.val));
}
}
LargeIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::BigIntVal& val) {
if (val.is_null) {
return LargeIntVal::null();
}
return LargeIntVal(largeint_abs(__int128(val.val)));
}
BigIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::IntVal& val) {
if (val.is_null) {
return BigIntVal::null();
}
return BigIntVal(std::abs(int64_t(val.val)));
}
IntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::SmallIntVal& val) {
if (val.is_null) {
return IntVal::null();
}
return IntVal(std::abs(int32_t(val.val)));
}
SmallIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::TinyIntVal& val) {
if (val.is_null) {
return SmallIntVal::null();
}
return SmallIntVal(std::abs(int16_t(val.val)));
}
#define LOG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \
RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \
if (v.is_null || v.val <= 0) return RET_TYPE::null(); \
return RET_TYPE(FN(v.val)); \
}
// Generates a UDF that always calls FN() on the input val and returns it.
#define ONE_ARG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \
RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \
if (v.is_null) return RET_TYPE::null(); \
return RET_TYPE(FN(v.val)); \
}
ONE_ARG_MATH_FN(abs, DoubleVal, DoubleVal, std::fabs);
ONE_ARG_MATH_FN(abs, FloatVal, FloatVal, std::fabs);
ONE_ARG_MATH_FN(sin, DoubleVal, DoubleVal, std::sin);
ONE_ARG_MATH_FN(asin, DoubleVal, DoubleVal, std::asin);
ONE_ARG_MATH_FN(cos, DoubleVal, DoubleVal, std::cos);
ONE_ARG_MATH_FN(acos, DoubleVal, DoubleVal, std::acos);
ONE_ARG_MATH_FN(tan, DoubleVal, DoubleVal, std::tan);
ONE_ARG_MATH_FN(atan, DoubleVal, DoubleVal, std::atan);
ONE_ARG_MATH_FN(sqrt, DoubleVal, DoubleVal, std::sqrt);
ONE_ARG_MATH_FN(cbrt, DoubleVal, DoubleVal, std::cbrt);
ONE_ARG_MATH_FN(ceil, BigIntVal, DoubleVal, std::ceil);
ONE_ARG_MATH_FN(floor, BigIntVal, DoubleVal, std::floor);
ONE_ARG_MATH_FN(exp, DoubleVal, DoubleVal, std::exp);
LOG_MATH_FN(ln, DoubleVal, DoubleVal, std::log);
LOG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10);
TinyIntVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) {
if (v.is_null) {
return TinyIntVal::null();
}
return TinyIntVal((v.val > 0) ? 1 : ((v.val < 0) ? -1 : 0));
}
DoubleVal MathFunctions::radians(FunctionContext* ctx, const DoubleVal& v) {
if (v.is_null) {
return v;
}
return DoubleVal(v.val * M_PI / 180.0);
}
DoubleVal MathFunctions::degrees(FunctionContext* ctx, const DoubleVal& v) {
if (v.is_null) {
return v;
}
return DoubleVal(v.val * 180.0 / M_PI);
}
BigIntVal MathFunctions::round(FunctionContext* ctx, const DoubleVal& v) {
if (v.is_null) {
return BigIntVal::null();
}
return BigIntVal(static_cast<int64_t>(v.val + ((v.val < 0) ? -0.5 : 0.5)));
}
BigIntVal MathFunctions::round_bankers(FunctionContext* ctx, const DoubleVal& v) {
return BigIntVal(static_cast<int64_t>(round_bankers(ctx, v, IntVal(0)).val));
}
DoubleVal MathFunctions::round_bankers(doris_udf::FunctionContext* ctx, const DoubleVal& v,
const IntVal& d) {
const double TOLERANCE = 1e-10;
double shift = std::pow(10, d.val);
double t = v.val * shift;
double rounded = std::round(t);
if (int64_t(rounded) % 2 == 1) {
if (::abs(rounded - t) - 0.5 < TOLERANCE) {
rounded -= 1;
} else {
rounded += 1;
}
}
return DoubleVal(rounded / shift);
}
DoubleVal MathFunctions::round_up_to(FunctionContext* ctx, const DoubleVal& v,
const IntVal& scale) {
if (v.is_null || scale.is_null) {
return DoubleVal::null();
}
return DoubleVal(my_double_round(v.val, scale.val, false, false));
}
DoubleVal MathFunctions::truncate(FunctionContext* ctx, const DoubleVal& v, const IntVal& scale) {
if (v.is_null || scale.is_null) {
return DoubleVal::null();
}
return DoubleVal(my_double_round(v.val, scale.val, false, true));
}
DoubleVal MathFunctions::log2(FunctionContext* ctx, const DoubleVal& v) {
if (v.is_null || v.val <= 0.0) {
return DoubleVal::null();
}
return DoubleVal(std::log(v.val) / std::log(2.0));
}
const double EPSILON = 1e-9;
DoubleVal MathFunctions::log(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& v) {
if (base.is_null || v.is_null) {
return DoubleVal::null();
}
if (base.val <= 0 || std::fabs(base.val - 1.0) < EPSILON || v.val <= 0.0) {
return DoubleVal::null();
}
return DoubleVal(std::log(v.val) / std::log(base.val));
}
DoubleVal MathFunctions::pow(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& exp) {
if (base.is_null || exp.is_null) {
return DoubleVal::null();
}
return DoubleVal(std::pow(base.val, exp.val));
}
void MathFunctions::rand_prepare(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
std::mt19937* generator = reinterpret_cast<std::mt19937*>(ctx->allocate(sizeof(std::mt19937)));
if (UNLIKELY(generator == nullptr)) {
LOG(ERROR) << "allocate random seed generator failed.";
return;
}
ctx->set_function_state(scope, generator);
new (generator) std::mt19937();
if (scope == FunctionContext::THREAD_LOCAL) {
if (ctx->get_num_args() == 1) {
uint32_t seed = 0;
// This is a call to RandSeed, initialize the seed
// TODO: should we support non-constant seed?
if (!ctx->is_arg_constant(0)) {
ctx->set_error("Seed argument to rand() must be constant");
return;
}
BigIntVal* seed_arg = static_cast<BigIntVal*>(ctx->get_constant_arg(0));
if (!seed_arg->is_null) {
seed = seed_arg->val;
}
generator->seed(seed);
} else {
generator->seed(std::random_device()());
}
}
}
DoubleVal MathFunctions::rand(FunctionContext* ctx) {
std::mt19937* generator =
reinterpret_cast<std::mt19937*>(ctx->get_function_state(FunctionContext::THREAD_LOCAL));
DCHECK(generator != nullptr);
static const double min = 0.0;
static const double max = 1.0;
std::uniform_real_distribution<double> distribution(min, max);
return DoubleVal(distribution(*generator));
}
DoubleVal MathFunctions::rand_seed(FunctionContext* ctx, const BigIntVal& seed) {
if (seed.is_null) {
return DoubleVal::null();
}
return rand(ctx);
}
void MathFunctions::rand_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) {
if (scope == FunctionContext::THREAD_LOCAL) {
uint8_t* generator =
reinterpret_cast<uint8_t*>(ctx->get_function_state(FunctionContext::THREAD_LOCAL));
ctx->free(generator);
ctx->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
}
}
StringVal MathFunctions::bin(FunctionContext* ctx, const BigIntVal& v) {
if (v.is_null) {
return StringVal::null();
}
// Cast to an unsigned integer because it is compiler dependent
// whether the sign bit will be shifted like a regular bit.
// (logical vs. arithmetic shift for signed numbers)
uint64_t n = static_cast<uint64_t>(v.val);
const size_t max_bits = sizeof(uint64_t) * 8;
char result[max_bits];
uint32_t index = max_bits;
do {
result[--index] = '0' + (n & 1);
} while (n >>= 1);
return AnyValUtil::from_buffer_temp(ctx, result + index, max_bits - index);
}
StringVal MathFunctions::hex_int(FunctionContext* ctx, const BigIntVal& v) {
if (v.is_null) {
return StringVal::null();
}
uint64_t num = v.val;
if (num == 0) {
return AnyValUtil::from_string_temp(ctx, "0");
}
char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
// uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
// need 1 more space for '\0'
char ans[17];
int i = 0;
while (num) {
ans[i++] = hex[num & 15];
num = num >> 4;
}
ans[i] = '\0';
// reverse
for (int k = 0, j = i - 1; k <= j; k++, j--) {
char tmp = ans[j];
ans[j] = ans[k];
ans[k] = tmp;
}
return AnyValUtil::from_string_temp(ctx, ans);
}
StringVal MathFunctions::hex_string(FunctionContext* ctx, const StringVal& s) {
if (s.is_null) {
return StringVal::null();
}
StringVal result = StringVal::create_temp_string_val(ctx, s.len * 2);
simd::VStringFunctions::hex_encode(s.ptr, s.len, reinterpret_cast<char*>(result.ptr));
return result;
}
StringVal MathFunctions::unhex(FunctionContext* ctx, const StringVal& s) {
if (s.is_null) {
return StringVal::null();
}
// For odd number of chars return empty string like Hive does.
if (s.len & 1) {
return StringVal();
}
int result_len = s.len / 2;
StringVal result_string_val(ctx, result_len);
char* result = reinterpret_cast<char*>(result_string_val.ptr);
int res_index = 0;
int s_index = 0;
while (s_index < s.len) {
char c = 0;
for (int j = 0; j < 2; ++j, ++s_index) {
switch (s.ptr[s_index]) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
c += (s.ptr[s_index] - '0') * ((j == 0) ? 16 : 1);
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
// Map to decimal values [10, 15]
c += (s.ptr[s_index] - 'A' + 10) * ((j == 0) ? 16 : 1);
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
// Map to decimal [10, 15]
c += (s.ptr[s_index] - 'a' + 10) * ((j == 0) ? 16 : 1);
break;
default:
// Character not in hex alphabet, return empty string.
return StringVal();
}
}
result[res_index] = c;
++res_index;
}
return result_string_val;
}
StringVal MathFunctions::conv_int(FunctionContext* ctx, const BigIntVal& num,
const TinyIntVal& src_base, const TinyIntVal& dest_base) {
if (num.is_null || src_base.is_null || dest_base.is_null) {
return StringVal::null();
}
// As in MySQL and Hive, min base is 2 and max base is 36.
// (36 is max base representable by alphanumeric chars)
// If a negative target base is given, num should be interpreted in 2's complement.
if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE ||
std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) {
// Return nullptr like Hive does.
return StringVal::null();
}
// Invalid input.
if (src_base.val < 0 && num.val >= 0) {
return StringVal::null();
}
int64_t decimal_num = num.val;
if (src_base.val != 10) {
// Convert src_num representing a number in src_base but encoded in decimal
// into its actual decimal number.
if (!decimal_in_base_to_decimal(num.val, src_base.val, &decimal_num)) {
// Handle overflow, setting decimal_num appropriately.
handle_parse_result(dest_base.val, &decimal_num, StringParser::PARSE_OVERFLOW);
}
}
return decimal_to_base(ctx, decimal_num, dest_base.val);
}
StringVal MathFunctions::conv_string(FunctionContext* ctx, const StringVal& num_str,
const TinyIntVal& src_base, const TinyIntVal& dest_base) {
if (num_str.is_null || src_base.is_null || dest_base.is_null) {
return StringVal::null();
}
// As in MySQL and Hive, min base is 2 and max base is 36.
// (36 is max base representable by alphanumeric chars)
// If a negative target base is given, num should be interpreted in 2's complement.
if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE ||
std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) {
// Return nullptr like Hive does.
return StringVal::null();
}
// Convert digits in num_str in src_base to decimal.
StringParser::ParseResult parse_res;
int64_t decimal_num = StringParser::string_to_int<int64_t>(
reinterpret_cast<char*>(num_str.ptr), num_str.len, src_base.val, &parse_res);
if (src_base.val < 0 && decimal_num >= 0) {
// Invalid input.
return StringVal::null();
}
if (!handle_parse_result(dest_base.val, &decimal_num, parse_res)) {
// Return 0 for invalid input strings like Hive does.
return StringVal(reinterpret_cast<uint8_t*>(const_cast<char*>("0")), 1);
}
return decimal_to_base(ctx, decimal_num, dest_base.val);
}
StringVal MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num, int8_t dest_base) {
// Max number of digits of any base (base 2 gives max digits), plus sign.
const size_t max_digits = sizeof(uint64_t) * 8 + 1;
@ -579,153 +171,4 @@ bool MathFunctions::handle_parse_result(int8_t dest_base, int64_t* num,
return true;
}
BigIntVal MathFunctions::pmod_bigint(FunctionContext* ctx, const BigIntVal& a, const BigIntVal& b) {
if (a.is_null || b.is_null) {
return BigIntVal::null();
}
return BigIntVal(((a.val % b.val) + b.val) % b.val);
}
DoubleVal MathFunctions::pmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) {
if (a.is_null || b.is_null) {
return DoubleVal::null();
}
return DoubleVal(fmod(fmod(a.val, b.val) + b.val, b.val));
}
FloatVal MathFunctions::fmod_float(FunctionContext* ctx, const FloatVal& a, const FloatVal& b) {
if (a.is_null || b.is_null || b.val == 0) {
return FloatVal::null();
}
return FloatVal(fmodf(a.val, b.val));
}
DoubleVal MathFunctions::fmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) {
if (a.is_null || b.is_null || b.val == 0) {
return DoubleVal::null();
}
return DoubleVal(fmod(a.val, b.val));
}
BigIntVal MathFunctions::positive_bigint(FunctionContext* ctx, const BigIntVal& val) {
return val;
}
DoubleVal MathFunctions::positive_double(FunctionContext* ctx, const DoubleVal& val) {
return val;
}
DecimalV2Val MathFunctions::positive_decimal(FunctionContext* ctx, const DecimalV2Val& val) {
return val;
}
BigIntVal MathFunctions::negative_bigint(FunctionContext* ctx, const BigIntVal& val) {
if (val.is_null) {
return val;
}
return BigIntVal(-val.val);
}
DoubleVal MathFunctions::negative_double(FunctionContext* ctx, const DoubleVal& val) {
if (val.is_null) {
return val;
}
return DoubleVal(-val.val);
}
DecimalV2Val MathFunctions::negative_decimal(FunctionContext* ctx, const DecimalV2Val& val) {
if (val.is_null) {
return val;
}
const DecimalV2Value& dv1 = DecimalV2Value::from_decimal_val(val);
DecimalV2Val result;
(-dv1).to_decimal_val(&result);
return result;
}
#define LEAST_FN(TYPE) \
TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \
if (args[0].is_null) return TYPE::null(); \
int result_idx = 0; \
for (int i = 1; i < num_args; ++i) { \
if (args[i].is_null) return TYPE::null(); \
if (args[i].val < args[result_idx].val) result_idx = i; \
} \
return TYPE(args[result_idx].val); \
}
#define LEAST_FNS() \
LEAST_FN(TinyIntVal); \
LEAST_FN(SmallIntVal); \
LEAST_FN(IntVal); \
LEAST_FN(BigIntVal); \
LEAST_FN(LargeIntVal); \
LEAST_FN(FloatVal); \
LEAST_FN(DoubleVal);
LEAST_FNS();
#define LEAST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \
TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \
if (args[0].is_null) return TYPE::null(); \
DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \
for (int i = 1; i < num_args; ++i) { \
if (args[i].is_null) return TYPE::null(); \
DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \
if (val < result_val) result_val = val; \
} \
TYPE result; \
result_val.to_##TYPE_NAME(&result); \
return result; \
}
#define LEAST_NONNUMERIC_FNS() \
LEAST_NONNUMERIC_FN(string_val, StringVal, StringRef); \
LEAST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \
LEAST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value);
LEAST_NONNUMERIC_FNS();
#define GREATEST_FN(TYPE) \
TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \
if (args[0].is_null) return TYPE::null(); \
int result_idx = 0; \
for (int i = 1; i < num_args; ++i) { \
if (args[i].is_null) return TYPE::null(); \
if (args[i].val > args[result_idx].val) result_idx = i; \
} \
return TYPE(args[result_idx].val); \
}
#define GREATEST_FNS() \
GREATEST_FN(TinyIntVal); \
GREATEST_FN(SmallIntVal); \
GREATEST_FN(IntVal); \
GREATEST_FN(BigIntVal); \
GREATEST_FN(LargeIntVal); \
GREATEST_FN(FloatVal); \
GREATEST_FN(DoubleVal);
GREATEST_FNS();
#define GREATEST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \
TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \
if (args[0].is_null) return TYPE::null(); \
DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \
for (int i = 1; i < num_args; ++i) { \
if (args[i].is_null) return TYPE::null(); \
DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \
if (val > result_val) result_val = val; \
} \
TYPE result; \
result_val.to_##TYPE_NAME(&result); \
return result; \
}
#define GREATEST_NONNUMERIC_FNS() \
GREATEST_NONNUMERIC_FN(string_val, StringVal, StringRef); \
GREATEST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \
GREATEST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value);
GREATEST_NONNUMERIC_FNS();
} // namespace doris