[fix](decimal) Fix long string casting to decimalv2 (#35121)
This commit is contained in:
@ -594,123 +594,74 @@ T StringParser::string_to_decimal(const char* __restrict s, int len, int type_pr
|
||||
bool found_exponent = false;
|
||||
int8_t exponent = 0;
|
||||
T value = 0;
|
||||
if constexpr (TYPE_DECIMALV2 == P) {
|
||||
// decimalv2 do not care type_scale and type_precision,just keep the origin logic
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const char& c = s[i];
|
||||
if (LIKELY('0' <= c && c <= '9')) {
|
||||
found_value = true;
|
||||
// Ignore digits once the type's precision limit is reached. This avoids
|
||||
// overflowing the underlying storage while handling a string like
|
||||
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
|
||||
// an exponent will be made later.
|
||||
if (LIKELY(type_precision > precision)) {
|
||||
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
|
||||
} else {
|
||||
*result = StringParser::PARSE_OVERFLOW;
|
||||
value = is_negative
|
||||
? vectorized::min_decimal_value<DecimalType>(type_precision)
|
||||
: vectorized::max_decimal_value<DecimalType>(type_precision);
|
||||
return value;
|
||||
}
|
||||
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
|
||||
bool has_round = false;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const char& c = s[i];
|
||||
if (LIKELY('0' <= c && c <= '9')) {
|
||||
found_value = true;
|
||||
// Ignore digits once the type's precision limit is reached. This avoids
|
||||
// overflowing the underlying storage while handling a string like
|
||||
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
|
||||
// an exponent will be made later.
|
||||
if (LIKELY(type_precision > precision) && !has_round) {
|
||||
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
|
||||
++precision;
|
||||
scale += found_dot;
|
||||
} else if (c == '.' && LIKELY(!found_dot)) {
|
||||
found_dot = 1;
|
||||
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
|
||||
found_exponent = true;
|
||||
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
|
||||
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
|
||||
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
|
||||
*result = StringParser::PARSE_UNDERFLOW;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
if (value == 0) {
|
||||
*result = StringParser::PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
*result = StringParser::PARSE_SUCCESS;
|
||||
value *= get_scale_multiplier<T>(type_scale - scale);
|
||||
|
||||
return is_negative ? T(-value) : T(value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// decimalv3
|
||||
bool has_round = false;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const char& c = s[i];
|
||||
if (LIKELY('0' <= c && c <= '9')) {
|
||||
found_value = true;
|
||||
// Ignore digits once the type's precision limit is reached. This avoids
|
||||
// overflowing the underlying storage while handling a string like
|
||||
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
|
||||
// an exponent will be made later.
|
||||
if (LIKELY(type_precision > precision) && !has_round) {
|
||||
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
|
||||
++precision;
|
||||
scale += found_dot;
|
||||
cur_digit = precision - scale;
|
||||
} else if (!found_dot && max_digit < (precision - scale)) {
|
||||
*result = StringParser::PARSE_OVERFLOW;
|
||||
value = is_negative
|
||||
? vectorized::min_decimal_value<DecimalType>(type_precision)
|
||||
cur_digit = precision - scale;
|
||||
} else if (!found_dot && max_digit < (precision - scale)) {
|
||||
*result = StringParser::PARSE_OVERFLOW;
|
||||
value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
|
||||
: vectorized::max_decimal_value<DecimalType>(type_precision);
|
||||
return value;
|
||||
} else if (found_dot && scale >= type_scale && !has_round) {
|
||||
// make rounding cases
|
||||
if (c > '4') {
|
||||
value += 1;
|
||||
}
|
||||
has_round = true;
|
||||
continue;
|
||||
} else if (!found_dot) {
|
||||
++cur_digit;
|
||||
return value;
|
||||
} else if (found_dot && scale >= type_scale && !has_round) {
|
||||
// make rounding cases
|
||||
if (c > '4') {
|
||||
value += 1;
|
||||
}
|
||||
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
|
||||
} else if (c == '.' && LIKELY(!found_dot)) {
|
||||
found_dot = 1;
|
||||
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
|
||||
found_exponent = true;
|
||||
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
|
||||
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
|
||||
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
|
||||
*result = StringParser::PARSE_UNDERFLOW;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
if (value == 0) {
|
||||
*result = StringParser::PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
// here to handle
|
||||
*result = StringParser::PARSE_SUCCESS;
|
||||
if (type_scale >= scale) {
|
||||
value *= get_scale_multiplier<T>(type_scale - scale);
|
||||
// here meet non-valid character, should return the value, keep going to meet
|
||||
// the E/e character because we make right user-given type_precision
|
||||
// not max number type_precision
|
||||
if (!is_numeric_ascii(c)) {
|
||||
if (cur_digit > type_precision) {
|
||||
*result = StringParser::PARSE_OVERFLOW;
|
||||
value = is_negative ? vectorized::min_decimal_value<DecimalType>(
|
||||
type_precision)
|
||||
: vectorized::max_decimal_value<DecimalType>(
|
||||
type_precision);
|
||||
return value;
|
||||
}
|
||||
return is_negative ? T(-value) : T(value);
|
||||
}
|
||||
}
|
||||
|
||||
return is_negative ? T(-value) : T(value);
|
||||
has_round = true;
|
||||
continue;
|
||||
} else if (!found_dot) {
|
||||
++cur_digit;
|
||||
}
|
||||
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
|
||||
} else if (c == '.' && LIKELY(!found_dot)) {
|
||||
found_dot = 1;
|
||||
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
|
||||
found_exponent = true;
|
||||
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
|
||||
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
|
||||
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
|
||||
*result = StringParser::PARSE_UNDERFLOW;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
if (value == 0) {
|
||||
*result = StringParser::PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
// here to handle
|
||||
*result = StringParser::PARSE_SUCCESS;
|
||||
if (type_scale >= scale) {
|
||||
value *= get_scale_multiplier<T>(type_scale - scale);
|
||||
// here meet non-valid character, should return the value, keep going to meet
|
||||
// the E/e character because we make right user-given type_precision
|
||||
// not max number type_precision
|
||||
if (!is_numeric_ascii(c)) {
|
||||
if (cur_digit > type_precision) {
|
||||
*result = StringParser::PARSE_OVERFLOW;
|
||||
value = is_negative
|
||||
? vectorized::min_decimal_value<DecimalType>(type_precision)
|
||||
: vectorized::max_decimal_value<DecimalType>(
|
||||
type_precision);
|
||||
return value;
|
||||
}
|
||||
return is_negative ? T(-value) : T(value);
|
||||
}
|
||||
}
|
||||
|
||||
return is_negative ? T(-value) : T(value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -103,7 +103,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
|
||||
"12345678901234567.012345677", "12345678901234567.012345677",
|
||||
"999999999999999999.999999999"},
|
||||
{"12345678901234567.012345678", "123456789012345678.012345670",
|
||||
"12345678901234567.012345678", "", ""}),
|
||||
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
|
||||
// decimal32 ==> decimal32(9,2)
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
|
||||
// (7,2) (6,3) (7,3) (8,1)
|
||||
|
||||
@ -74,21 +74,22 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"},
|
||||
{"doris be better"}),
|
||||
// decimal ==> decimalv2(decimal<128>(27,9))
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
|
||||
{
|
||||
// (17, 9)(first 0 will ignore)
|
||||
"012345678901234567.012345678",
|
||||
// (18, 8) (automatically fill 0 for scala)
|
||||
"123456789012345678.01234567",
|
||||
// (17, 10) (rounding last to make it fit)
|
||||
"12345678901234567.0123456779",
|
||||
// (17, 11) (rounding last to make it fit)
|
||||
"12345678901234567.01234567791",
|
||||
// (19, 8) (wrong)
|
||||
"1234567890123456789.01234567",
|
||||
},
|
||||
{"12345678901234567.012345678", "123456789012345678.012345670",
|
||||
"12345678901234567.012345678", "", ""}),
|
||||
FieldType_RandStr(
|
||||
FieldType::OLAP_FIELD_TYPE_DECIMAL,
|
||||
{
|
||||
// (17, 9)(first 0 will ignore)
|
||||
"012345678901234567.012345678",
|
||||
// (18, 8) (automatically fill 0 for scala)
|
||||
"123456789012345678.01234567",
|
||||
// (17, 10) (rounding last to make it fit)
|
||||
"12345678901234567.0123456779",
|
||||
// (17, 11) (rounding last to make it fit)
|
||||
"12345678901234567.01234567791",
|
||||
// (19, 8) (wrong)
|
||||
"1234567890123456789.01234567",
|
||||
},
|
||||
{"12345678901234567.012345678", "123456789012345678.012345670",
|
||||
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
|
||||
// decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1)
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
|
||||
{"1234567.12", "123456.123", "1234567.123", "12345679.1"},
|
||||
|
||||
@ -74,21 +74,22 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"},
|
||||
{"doris be better"}),
|
||||
// decimal ==> decimalv2(decimal<128>(27,9))
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
|
||||
{
|
||||
// (17, 9)(first 0 will ignore)
|
||||
"012345678901234567.012345678",
|
||||
// (18, 8) (automatically fill 0 for scala)
|
||||
"123456789012345678.01234567",
|
||||
// (17, 10) (rounding last to make it fit)
|
||||
"12345678901234567.0123456779",
|
||||
// (17, 11) (rounding last to make it fit)
|
||||
"12345678901234567.01234567791",
|
||||
// (19, 8) (wrong)
|
||||
"1234567890123456789.01234567",
|
||||
},
|
||||
{"12345678901234567.012345678", "123456789012345678.012345670",
|
||||
"12345678901234567.012345678", "", ""}),
|
||||
FieldType_RandStr(
|
||||
FieldType::OLAP_FIELD_TYPE_DECIMAL,
|
||||
{
|
||||
// (17, 9)(first 0 will ignore)
|
||||
"012345678901234567.012345678",
|
||||
// (18, 8) (automatically fill 0 for scala)
|
||||
"123456789012345678.01234567",
|
||||
// (17, 10) (rounding last to make it fit)
|
||||
"12345678901234567.0123456779",
|
||||
// (17, 11) (rounding last to make it fit)
|
||||
"12345678901234567.01234567791",
|
||||
// (19, 8) (wrong)
|
||||
"1234567890123456789.01234567",
|
||||
},
|
||||
{"12345678901234567.012345678", "123456789012345678.012345670",
|
||||
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
|
||||
// decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1)
|
||||
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
|
||||
{"1234567.12", "123456.123", "1234567.123", "12345679.1"},
|
||||
@ -429,13 +430,13 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
|
||||
"[\\1234567890123456789.01234567\\]"},
|
||||
{"[4.000000000, 5.500000000, 6.670000000]",
|
||||
"[12345678901234567.012345678, 123456789012345678.012345670, "
|
||||
"12345678901234567.012345678, null, null]",
|
||||
"12345678901234567.012345678, 12345678901234567.012345678, null]",
|
||||
"[null, null, null, null, null]", "[null]"},
|
||||
{"[4.000000000, 5.500000000, 6.670000000]",
|
||||
"[12345678901234567.012345678, 123456789012345678.012345670, "
|
||||
"12345678901234567.012345678, null, null]",
|
||||
"12345678901234567.012345678, 12345678901234567.012345678, null]",
|
||||
"[12345678901234567.012345678, 123456789012345678.012345670, "
|
||||
"12345678901234567.012345678, null, null]",
|
||||
"12345678901234567.012345678, 12345678901234567.012345678, null]",
|
||||
"[null]"}),
|
||||
};
|
||||
// array type
|
||||
|
||||
@ -15,11 +15,3 @@
|
||||
11.99990
|
||||
837.43444
|
||||
|
||||
-- !decimalv2_insert --
|
||||
999999999999999999.999999999 1.000000000
|
||||
-999999999999999999.999999999 2.000000000
|
||||
999999999999999999.999999999 3.000000000
|
||||
-999999999999999999.999999999 4.000000000
|
||||
999999999999999999.999999999 5.000000000
|
||||
-999999999999999999.999999999 6.000000000
|
||||
|
||||
|
||||
@ -27,10 +27,10 @@
|
||||
999999999999999999.999999999
|
||||
|
||||
-- !multi_overflow2 --
|
||||
999999999999999999.999999999 999999999999999999.999999999000000000
|
||||
999999999999999999.999999999 999999999999999999.999999999
|
||||
|
||||
-- !multi_overflow3 --
|
||||
999999999999999999.999999999 999999999999999999.999999999000000000
|
||||
999999999999999999.999999999 999999999999999999.999999999
|
||||
|
||||
-- !multi_overflow4 --
|
||||
999999999999999999.999999999 1.000000000 999999999999999999.999999999
|
||||
@ -39,10 +39,10 @@
|
||||
99999999999999999.999999999 0.100000000 999999999999999999.999999990
|
||||
|
||||
-- !div_overflow2 --
|
||||
999999999999999999.999999990
|
||||
999999999999999999.99999999
|
||||
|
||||
-- !div_overflow3 --
|
||||
99999999999999999.999999999 0.1 999999999999999999.9999999900000
|
||||
99999999999999999.999999999 0.1 999999999999999999.999999990
|
||||
|
||||
-- !div_overflow4 --
|
||||
999999999999999999.999999990
|
||||
@ -59,3 +59,6 @@
|
||||
-- !mod4 --
|
||||
0.099999999
|
||||
|
||||
-- !sql --
|
||||
2023-12-18T00:00 95357.10
|
||||
|
||||
|
||||
@ -84,42 +84,6 @@ suite("test_decimalv2_load", "nonConcurrent") {
|
||||
select * from ${tableName2} order by 1;
|
||||
"""
|
||||
|
||||
sql """
|
||||
drop table if exists test_decimalv2_insert;
|
||||
"""
|
||||
sql """
|
||||
CREATE TABLE `test_decimalv2_insert` (
|
||||
`k1` decimalv2(27, 9) null,
|
||||
`k2` decimalv2(27, 9) null
|
||||
)
|
||||
DISTRIBUTED BY HASH(`k1`) BUCKETS 10
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
"""
|
||||
sql "set enable_insert_strict=true;"
|
||||
// overflow, max is inserted
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("999999999999999999999999999999",1);
|
||||
"""
|
||||
// underflow, min is inserted
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("-999999999999999999999999999999",2);
|
||||
"""
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("999999999999999999.9999999991",3);
|
||||
"""
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("-999999999999999999.9999999991",4);
|
||||
"""
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("999999999999999999.9999999995",5);
|
||||
"""
|
||||
sql """
|
||||
insert into test_decimalv2_insert values("-999999999999999999.9999999995",6);
|
||||
"""
|
||||
qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; "
|
||||
|
||||
sql """
|
||||
admin set frontend config("enable_decimal_conversion" = "true");
|
||||
"""
|
||||
|
||||
@ -269,6 +269,24 @@ suite("test_decimalv2_overflow2") {
|
||||
"""
|
||||
|
||||
|
||||
sql """ drop TABLE if exists test_table """
|
||||
sql """ CREATE TABLE `test_table` (
|
||||
`day_date` datetime NULL COMMENT '',
|
||||
`growth_money` decimalv2(18, 2) NULL COMMENT ''
|
||||
) ENGINE=OLAP
|
||||
UNIQUE KEY(`day_date`)
|
||||
COMMENT ''
|
||||
DISTRIBUTED BY HASH(`day_date`) BUCKETS 4
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"is_being_synced" = "false",
|
||||
"storage_format" = "V2",
|
||||
"disable_auto_compaction" = "false",
|
||||
"enable_single_replica_compaction" = "false"
|
||||
); """
|
||||
sql """ insert into test_table values ('2023-12-18', '95357.100000000000000000000000000000000000')"""
|
||||
qt_sql """ select * from test_table """
|
||||
sql """ drop TABLE if exists test_table """
|
||||
// TODO
|
||||
// decimalv2 +-*/ integer
|
||||
// integer +-*/ decimalv2
|
||||
|
||||
Reference in New Issue
Block a user