534 lines
19 KiB
C++
534 lines
19 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#ifndef DORIS_BE_SRC_QUERY_RUNTIME_DECIMAL_VALUE_H
|
|
#define DORIS_BE_SRC_QUERY_RUNTIME_DECIMAL_VALUE_H
|
|
|
|
#include <cctype>
|
|
#include <climits>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
#include "common/logging.h"
|
|
#include "gutil/strings/numbers.h"
|
|
#include "udf/udf.h"
|
|
#include "util/hash_util.hpp"
|
|
#include "util/mysql_global.h"
|
|
|
|
namespace doris {
|
|
|
|
// The number of digits per "big digits"
|
|
static const int32_t DIG_PER_DEC1 = 9;
|
|
// Maximum length of buffer, whose item is our "big digits" (uint32),
|
|
static const int32_t DECIMAL_BUFF_LENGTH = 9;
|
|
// The maximum number of digits that my_decimal can possibly contain
|
|
static const int32_t DECIMAL_MAX_POSSIBLE_PRECISION = DECIMAL_BUFF_LENGTH * 9;
|
|
|
|
// Maximum guaranteed precision of number in decimal digits (number of our
|
|
// digits * number of decimal digits in one our big digit - number of decimal
|
|
// digits in one our big digit decreased by 1 (because we always put decimal
|
|
// point on the border of our big digits))
|
|
static const int32_t DECIMAL_MAX_PRECISION = DECIMAL_MAX_POSSIBLE_PRECISION - 8 * 2;
|
|
static const int32_t DECIMAL_MAX_SCALE = 30;
|
|
// NOT_FIXED_DEC is defined in mysql_com.h
|
|
#ifndef NOT_FIXED_DEC
|
|
static const int32_t NOT_FIXED_DEC = 31;
|
|
#endif
|
|
// maximum length of string representation (number of maximum decimal
|
|
// digits + 1 position for sign + 1 position for decimal point, no terminator)
|
|
static const int32_t DECIMAL_MAX_STR_LENGTH = (DECIMAL_MAX_POSSIBLE_PRECISION + 2);
|
|
|
|
static const int32_t DIG_MASK = 100000000; // 10^8
|
|
static const int32_t DIG_BASE = 1000000000; // 10^9
|
|
static const int32_t DIG_MAX = DIG_BASE - 1;
|
|
|
|
static const int32_t powers10[DIG_PER_DEC1 + 1] = {
|
|
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
|
|
static const int32_t frac_max[DIG_PER_DEC1 - 1] = {900000000, 990000000, 999000000, 999900000,
|
|
999990000, 999999000, 999999900, 999999990};
|
|
|
|
// TODO(lingbin): add to mysql result if we support "show warning" in our mysql protocol?
|
|
enum DecimalError {
|
|
E_DEC_OK = 0,
|
|
E_DEC_TRUNCATED = 1,
|
|
E_DEC_OVERFLOW = 2,
|
|
E_DEC_DIV_ZERO = 4,
|
|
E_DEC_BAD_NUM = 8,
|
|
E_DEC_OOM = 16,
|
|
|
|
E_DEC_ERROR = 31,
|
|
E_DEC_FATAL_ERROR = 30
|
|
};
|
|
|
|
enum DecimalRoundMode { HALF_UP = 1, HALF_EVEN = 2, CEILING = 3, FLOOR = 4, TRUNCATE = 5 };
|
|
|
|
// Type T should be an integer: int8_t, int16_t...
|
|
template <typename T>
|
|
inline T round_up(T length);
|
|
|
|
// Internally decimal numbers are stored base 10^9 (see DIG_BASE)
|
|
// So one variable of type big_digit_type is limited:
|
|
// 0 < decimal_digit <= DIG_MAX < DIG_BASE
|
|
class DecimalValue {
|
|
public:
|
|
friend DecimalValue operator+(const DecimalValue& v1, const DecimalValue& v2);
|
|
friend DecimalValue operator-(const DecimalValue& v1, const DecimalValue& v2);
|
|
friend DecimalValue operator*(const DecimalValue& v1, const DecimalValue& v2);
|
|
friend DecimalValue operator/(const DecimalValue& v1, const DecimalValue& v2);
|
|
friend int32_t do_add(const DecimalValue& value1, const DecimalValue& value2, DecimalValue* to);
|
|
friend int32_t do_sub(const DecimalValue& value1, const DecimalValue& value2, DecimalValue* to);
|
|
friend int do_mul(const DecimalValue& value1, const DecimalValue& value2, DecimalValue* to);
|
|
friend int do_div_mod(const DecimalValue& value1, const DecimalValue& value2, DecimalValue* to,
|
|
DecimalValue* mod);
|
|
friend std::istream& operator>>(std::istream& ism, DecimalValue& decimal_value);
|
|
|
|
friend DecimalValue operator-(const DecimalValue& v);
|
|
|
|
DecimalValue() : _buffer_length(DECIMAL_BUFF_LENGTH) { set_to_zero(); }
|
|
|
|
DecimalValue(const std::string& decimal_str) : _buffer_length(DECIMAL_BUFF_LENGTH) {
|
|
parse_from_str(decimal_str.c_str(), decimal_str.size());
|
|
}
|
|
|
|
// Construct from olap engine
|
|
// Note: the base is 10^9 for parameter frac_value, which means the max length of fraction part
|
|
// is 9, and the parameter frac_value need to be divided by 10^9.
|
|
DecimalValue(int64_t int_value, int64_t frac_value) : _buffer_length(DECIMAL_BUFF_LENGTH) {
|
|
set_to_zero();
|
|
if (int_value < 0 || frac_value < 0) {
|
|
_sign = true;
|
|
} else {
|
|
_sign = false;
|
|
}
|
|
|
|
int32_t big_digit_length = copy_int_to_decimal_int(std::abs(int_value), _buffer);
|
|
_int_length = big_digit_length * DIG_PER_DEC1;
|
|
_frac_length = copy_int_to_decimal_frac(std::abs(frac_value), _buffer + big_digit_length);
|
|
}
|
|
|
|
DecimalValue(int64_t int_value) : _buffer_length(DECIMAL_BUFF_LENGTH) {
|
|
set_to_zero();
|
|
_sign = int_value < 0 ? true : false;
|
|
|
|
int32_t big_digit_length = copy_int_to_decimal_int(std::abs(int_value), _buffer);
|
|
_int_length = big_digit_length * DIG_PER_DEC1;
|
|
_frac_length = 0;
|
|
}
|
|
|
|
DecimalValue& assign_from_float(const float float_value) {
|
|
// buffer is short, sign and '\0' is the 2.
|
|
char buffer[MAX_FLOAT_STR_LENGTH + 2];
|
|
buffer[0] = '\0';
|
|
int length = FloatToBuffer(float_value, MAX_FLOAT_STR_LENGTH, buffer);
|
|
DCHECK(length >= 0) << "gcvt float failed, float value=" << float_value;
|
|
parse_from_str(buffer, length);
|
|
return *this;
|
|
}
|
|
|
|
DecimalValue& assign_from_double(const double double_value) {
|
|
char buffer[MAX_DOUBLE_STR_LENGTH + 2];
|
|
buffer[0] = '\0';
|
|
int length = DoubleToBuffer(double_value, MAX_DOUBLE_STR_LENGTH, buffer);
|
|
DCHECK(length >= 0) << "gcvt double failed, double value=" << double_value;
|
|
parse_from_str(buffer, length);
|
|
return *this;
|
|
}
|
|
|
|
// These cast functions are needed in "functions.cc", which is generated by python script.
|
|
// e.g. "ComputeFunctions::Cast_DecimalValue_double()"
|
|
// Discard the scale part
|
|
// ATTN: invoker must make sure no OVERFLOW
|
|
operator int64_t() const {
|
|
const int32_t* buff = _buffer;
|
|
int64_t result = 0;
|
|
|
|
int32_t int_length = _int_length;
|
|
for (int32_t i = 0; int_length > 0; ++i) {
|
|
result = (result * DIG_BASE) + *(buff + i);
|
|
int_length -= DIG_PER_DEC1;
|
|
}
|
|
|
|
// negative
|
|
if (_sign) {
|
|
result = -result;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// These cast functions are needed in "functions.cc", which is generated by python script.
|
|
// e.g. "ComputeFunctions::Cast_DecimalValue_double()"
|
|
// Discard the scale part
|
|
// ATTN: invoker must make sure no OVERFLOW
|
|
operator __int128() const {
|
|
const int32_t* buff = _buffer;
|
|
__int128 result = 0;
|
|
|
|
int32_t int_length = _int_length;
|
|
for (int32_t i = 0; int_length > 0; ++i) {
|
|
result = (result * DIG_BASE) + *(buff + i);
|
|
int_length -= DIG_PER_DEC1;
|
|
}
|
|
|
|
// negative
|
|
if (_sign) {
|
|
result = -result;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
operator bool() const { return !is_zero(); }
|
|
|
|
operator int8_t() const { return static_cast<char>(operator int64_t()); }
|
|
|
|
operator int16_t() const { return static_cast<int16_t>(operator int64_t()); }
|
|
|
|
operator int32_t() const { return static_cast<int32_t>(operator int64_t()); }
|
|
|
|
operator size_t() const { return static_cast<size_t>(operator int64_t()); }
|
|
|
|
operator float() const { return (float)operator double(); }
|
|
|
|
operator double() const {
|
|
std::string str_buff = to_string();
|
|
double result = std::strtod(str_buff.c_str(), nullptr);
|
|
return result;
|
|
}
|
|
|
|
DecimalValue& operator+=(const DecimalValue& other);
|
|
|
|
// To be Compatible with OLAP
|
|
// ATTN: NO-OVERFLOW should be guaranteed.
|
|
int64_t int_value() const { return operator int64_t(); }
|
|
|
|
// To be Compatible with OLAP
|
|
// NOTE: return a negative value if decimal is negative.
|
|
// ATTN: the max length of fraction part in OLAP is 9, so the 'big digits' except the first one
|
|
// will be truncated.
|
|
int32_t frac_value() const {
|
|
const int32_t intg = round_up(_int_length);
|
|
const int32_t frac = round_up(_frac_length);
|
|
const int32_t* frac_begin = _buffer + intg;
|
|
int32_t frac_val = (frac != 0) ? *frac_begin : 0;
|
|
frac_val = (_sign == true) ? -frac_val : frac_val;
|
|
return frac_val;
|
|
}
|
|
|
|
bool equal(const DecimalValue& other) const { return (*this - other).is_zero(); }
|
|
|
|
bool bigger(const DecimalValue& other) const { return (other - *this)._sign; }
|
|
|
|
bool smaller(const DecimalValue& other) const { return (*this - other)._sign; }
|
|
|
|
bool operator==(const DecimalValue& other) const { return equal(other); }
|
|
|
|
bool operator!=(const DecimalValue& other) const { return !equal(other); }
|
|
|
|
bool operator<=(const DecimalValue& other) const { return !bigger(other); }
|
|
|
|
bool operator>=(const DecimalValue& other) const { return !smaller(other); }
|
|
|
|
bool operator<(const DecimalValue& other) const { return smaller(other); }
|
|
|
|
bool operator>(const DecimalValue& other) const { return bigger(other); }
|
|
|
|
// change to maximum value for given precision and scale
|
|
// precision/scale - see decimal_bin_size() below
|
|
// to - decimal where where the result will be stored
|
|
// to->buf and to->len must be set.
|
|
void to_max_decimal(int precision, int frac);
|
|
void to_min_decimal(int precision, int frac) {
|
|
to_max_decimal(precision, frac);
|
|
_sign = -1;
|
|
}
|
|
|
|
// The maximum of fraction part is "scale".
|
|
// If the length of fraction part is less than "scale", '0' will be filled.
|
|
std::string to_string(int scale) const;
|
|
// Output actual "scale", remove ending zeroes.
|
|
std::string to_string() const;
|
|
|
|
// Convert string to decimal
|
|
// @param from - value to convert. Doesn't have to be \0 terminated!
|
|
// will stop at the fist non-digit char(nor '.' 'e' 'E'),
|
|
// or reaches the length
|
|
// @param length - maximum length
|
|
// @return error number.
|
|
//
|
|
// E_DEC_OK/E_DEC_TRUNCATED/E_DEC_OVERFLOW/E_DEC_BAD_NUM/E_DEC_OOM
|
|
// In case of E_DEC_FATAL_ERROR *to is set to decimal zero
|
|
// (to make error handling easier)
|
|
//
|
|
// e.g. "1.2" ".2" "1.2e-3" "1.2e3"
|
|
int parse_from_str(const char* decimal_str, int32_t length);
|
|
|
|
std::string get_debug_info() const {
|
|
std::stringstream ss;
|
|
ss << "_int_length: " << _int_length << "; "
|
|
<< "_frac_length: " << _frac_length << "; "
|
|
<< "_sign: " << _sign << "; "
|
|
<< "_buffer_length: " << _buffer_length << "; ";
|
|
ss << "_buffer: [";
|
|
for (int i = 0; i < DIG_PER_DEC1; ++i) {
|
|
ss << _buffer[i] << ", ";
|
|
}
|
|
ss << "]; ";
|
|
return ss.str();
|
|
}
|
|
|
|
static DecimalValue get_min_decimal() {
|
|
DecimalValue value;
|
|
value._sign = true;
|
|
value._int_length = DECIMAL_MAX_POSSIBLE_PRECISION;
|
|
value._frac_length = 0;
|
|
for (int i = 0; i < DIG_PER_DEC1; ++i) {
|
|
value._buffer[i] = DIG_BASE - 1;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
static DecimalValue get_max_decimal() {
|
|
DecimalValue value;
|
|
value._sign = false;
|
|
value._int_length = DECIMAL_MAX_POSSIBLE_PRECISION;
|
|
value._frac_length = 0;
|
|
for (int i = 0; i < DIG_PER_DEC1; ++i) {
|
|
value._buffer[i] = DIG_BASE - 1;
|
|
}
|
|
return value;
|
|
}
|
|
|
|
static DecimalValue from_decimal_val(const doris_udf::DecimalVal& val) {
|
|
DecimalValue result;
|
|
result._int_length = val.int_len;
|
|
result._frac_length = val.frac_len;
|
|
result._sign = val.sign;
|
|
|
|
result._buffer_length = DECIMAL_BUFF_LENGTH;
|
|
memcpy(result._buffer, val.buffer, sizeof(int32_t) * DECIMAL_BUFF_LENGTH);
|
|
return result;
|
|
}
|
|
|
|
void to_decimal_val(doris_udf::DecimalVal* value) const {
|
|
value->int_len = _int_length;
|
|
value->frac_len = _frac_length;
|
|
value->sign = _sign;
|
|
memcpy(value->buffer, _buffer, sizeof(int32_t) * DECIMAL_BUFF_LENGTH);
|
|
}
|
|
|
|
// set DecimalValue to zero
|
|
void set_to_zero() {
|
|
_buffer_length = DECIMAL_BUFF_LENGTH;
|
|
memset(_buffer, 0, sizeof(int32_t) * DECIMAL_BUFF_LENGTH);
|
|
_int_length = 1;
|
|
_frac_length = 0;
|
|
_sign = false;
|
|
}
|
|
|
|
void to_abs_value() { _sign = false; }
|
|
|
|
uint32_t hash_uint(uint32_t value, uint32_t seed) const {
|
|
return HashUtil::hash(&value, sizeof(value), seed);
|
|
}
|
|
|
|
uint32_t hash(uint32_t seed) const {
|
|
uint32_t int_len = round_up(_int_length);
|
|
uint32_t frac_len = round_up(_frac_length);
|
|
int idx = 0;
|
|
while (idx < int_len && _buffer[idx] == 0) {
|
|
idx++;
|
|
}
|
|
while (idx < int_len) {
|
|
// Hash
|
|
seed = hash_uint(_buffer[idx++], seed);
|
|
}
|
|
idx = int_len + frac_len;
|
|
while (idx > int_len && _buffer[idx - 1] == 0) {
|
|
idx--;
|
|
}
|
|
while (idx > int_len) {
|
|
// Hash
|
|
seed = hash_uint(_buffer[--idx], seed);
|
|
}
|
|
// Hash sign
|
|
return hash_uint(_sign, seed);
|
|
}
|
|
|
|
int32_t precision() const { return _int_length + _frac_length; }
|
|
|
|
int32_t scale() const { return _frac_length; }
|
|
|
|
int round(DecimalValue* to, int scale, DecimalRoundMode mode);
|
|
|
|
private:
|
|
friend class MultiDistinctDecimalState;
|
|
|
|
bool is_zero() const {
|
|
const int32_t* buff = _buffer;
|
|
const int32_t* end = buff + round_up(_int_length) + round_up(_frac_length);
|
|
while (buff < end) {
|
|
if (*buff++) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// TODO(lingbin): complete this function
|
|
int shift(int32_t shift) { return 0; }
|
|
|
|
// Invoker make sure buff has enough space.
|
|
// return the number of "big digits".
|
|
int copy_int_to_decimal_int(int64_t int_value, int32_t* buff);
|
|
|
|
// ATTN: the max length of fraction part is 9 for now, so we can directly assign parameter
|
|
// frac_value to buff member.
|
|
int copy_int_to_decimal_frac(int64_t frac_value, int32_t* buff);
|
|
|
|
const int32_t* get_first_no_zero_index(int32_t* int_digit_num) const;
|
|
|
|
// _int_length is the number of *decimal* digits (NOT number of big_digit_type's !)
|
|
// before the point
|
|
// _frac_length is the number of decimal digits after the point
|
|
// _buffer_length is the length of buf (length of allocated space) in big_digit_type's,
|
|
// not in bytes
|
|
// _sign false means positive, true means negative
|
|
// _buffer is an array of big_digit_type's
|
|
// TODO(zc): use int64_t to aligned to 8
|
|
int32_t _int_length : 8;
|
|
int32_t _frac_length : 8;
|
|
int32_t _buffer_length : 8;
|
|
bool _sign;
|
|
int32_t _buffer[DECIMAL_BUFF_LENGTH];
|
|
};
|
|
|
|
DecimalValue operator+(const DecimalValue& v1, const DecimalValue& v2);
|
|
DecimalValue operator-(const DecimalValue& v1, const DecimalValue& v2);
|
|
DecimalValue operator*(const DecimalValue& v1, const DecimalValue& v2);
|
|
DecimalValue operator/(const DecimalValue& v1, const DecimalValue& v2);
|
|
DecimalValue operator%(const DecimalValue& v1, const DecimalValue& v2);
|
|
|
|
DecimalValue operator-(const DecimalValue& v);
|
|
|
|
std::ostream& operator<<(std::ostream& os, DecimalValue const& decimal_value);
|
|
std::istream& operator>>(std::istream& ism, DecimalValue& decimal_value);
|
|
|
|
// TODO(lingbin) discard the fraction part?
|
|
int64_t operator&(const DecimalValue& v1, const DecimalValue& v2);
|
|
int64_t operator|(const DecimalValue& v1, const DecimalValue& v2);
|
|
int64_t operator^(const DecimalValue& v1, const DecimalValue& v2);
|
|
int64_t operator~(const DecimalValue& v1);
|
|
|
|
// help to get the number of decimal_digit_t's digits
|
|
// e.g. for 1234567891.222 . intg=10, ROUND_UP(10) = 2.
|
|
// It means in decimal_digit_t type buff,
|
|
// it takes '2' bytes to store integer part
|
|
template <typename T>
|
|
inline T round_up(T length) {
|
|
return (T)((length + DIG_PER_DEC1 - 1) / DIG_PER_DEC1);
|
|
}
|
|
|
|
inline int DecimalValue::copy_int_to_decimal_int(int64_t int_value, int32_t* buff) {
|
|
int64_t dividend = int_value;
|
|
int32_t temp_buff[DECIMAL_BUFF_LENGTH];
|
|
int32_t index = 0; // index in temp_buffer
|
|
|
|
if (int_value == 0) {
|
|
_int_length = 0;
|
|
return 0;
|
|
}
|
|
|
|
int64_t quotient = 0;
|
|
do {
|
|
temp_buff[index++] = dividend % DIG_BASE;
|
|
quotient = dividend / DIG_BASE;
|
|
dividend = quotient;
|
|
} while (quotient != 0);
|
|
|
|
for (int32_t i = 0; i < index; ++i) {
|
|
buff[i] = temp_buff[index - i - 1];
|
|
}
|
|
return index;
|
|
}
|
|
|
|
inline int32_t DecimalValue::copy_int_to_decimal_frac(int64_t frac_value, int32_t* buff) {
|
|
if (frac_value == 0) {
|
|
return 0;
|
|
}
|
|
int32_t abs_frac_value = std::abs(frac_value);
|
|
if (std::abs(frac_value > DIG_BASE)) {
|
|
*buff = DIG_MAX;
|
|
return DIG_PER_DEC1;
|
|
}
|
|
|
|
*buff = abs_frac_value;
|
|
// Count digit length: (DIG_PER_DEC1 - the number of ending zeroes)
|
|
int32_t frac_len = DIG_PER_DEC1;
|
|
int32_t quotient = 0;
|
|
while ((quotient = frac_value % 10) == 0) {
|
|
frac_value /= 10;
|
|
--frac_len;
|
|
}
|
|
|
|
return frac_len;
|
|
}
|
|
|
|
inline const int32_t* DecimalValue::get_first_no_zero_index(int32_t* int_digit_num) const {
|
|
int32_t temp_intg = _int_length;
|
|
const int32_t* buff = _buffer;
|
|
int32_t first_big_digit_num = (temp_intg - 1) % DIG_PER_DEC1 + 1;
|
|
|
|
while (temp_intg > 0 && *buff == 0) {
|
|
temp_intg -= first_big_digit_num;
|
|
first_big_digit_num = DIG_PER_DEC1;
|
|
++buff;
|
|
}
|
|
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Warray-bounds"
|
|
// When the value of a "big digit" is "000099999", its 'intg' may be 5/6/7/8/9,
|
|
// we get accurate 'intg' here and the first no zero index of buff
|
|
if (temp_intg > 0) {
|
|
first_big_digit_num = (temp_intg - 1) % DIG_PER_DEC1 + 1;
|
|
for (; *buff < powers10[first_big_digit_num - 1]; --first_big_digit_num) {
|
|
--temp_intg;
|
|
}
|
|
} else {
|
|
temp_intg = 0;
|
|
}
|
|
#pragma GCC diagnostic pop
|
|
*int_digit_num = temp_intg;
|
|
return buff;
|
|
}
|
|
|
|
std::size_t hash_value(DecimalValue const& value);
|
|
|
|
} // end namespace doris
|
|
|
|
namespace std {
|
|
template <>
|
|
struct hash<doris::DecimalValue> {
|
|
size_t operator()(const doris::DecimalValue& v) const { return doris::hash_value(v); }
|
|
};
|
|
} // namespace std
|
|
|
|
#endif // DORIS_BE_SRC_QUERY_RUNTIME_DECIMAL_VALUE_H
|