[fix](decimal) Fix long string casting to decimalv2 (#35121)

This commit is contained in:
Gabriel
2024-05-22 14:24:22 +08:00
committed by yiguolei
parent 84f7bfffe2
commit c23384ff07
8 changed files with 124 additions and 194 deletions

View File

@ -594,123 +594,74 @@ T StringParser::string_to_decimal(const char* __restrict s, int len, int type_pr
bool found_exponent = false;
int8_t exponent = 0;
T value = 0;
if constexpr (TYPE_DECIMALV2 == P) {
// decimalv2 do not care type_scale and type_precision,just keep the origin logic
for (int i = 0; i < len; ++i) {
const char& c = s[i];
if (LIKELY('0' <= c && c <= '9')) {
found_value = true;
// Ignore digits once the type's precision limit is reached. This avoids
// overflowing the underlying storage while handling a string like
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
// an exponent will be made later.
if (LIKELY(type_precision > precision)) {
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
} else {
*result = StringParser::PARSE_OVERFLOW;
value = is_negative
? vectorized::min_decimal_value<DecimalType>(type_precision)
: vectorized::max_decimal_value<DecimalType>(type_precision);
return value;
}
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
bool has_round = false;
for (int i = 0; i < len; ++i) {
const char& c = s[i];
if (LIKELY('0' <= c && c <= '9')) {
found_value = true;
// Ignore digits once the type's precision limit is reached. This avoids
// overflowing the underlying storage while handling a string like
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
// an exponent will be made later.
if (LIKELY(type_precision > precision) && !has_round) {
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
++precision;
scale += found_dot;
} else if (c == '.' && LIKELY(!found_dot)) {
found_dot = 1;
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
found_exponent = true;
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
*result = StringParser::PARSE_UNDERFLOW;
}
return 0;
}
break;
} else {
if (value == 0) {
*result = StringParser::PARSE_FAILURE;
return 0;
}
*result = StringParser::PARSE_SUCCESS;
value *= get_scale_multiplier<T>(type_scale - scale);
return is_negative ? T(-value) : T(value);
}
}
} else {
// decimalv3
bool has_round = false;
for (int i = 0; i < len; ++i) {
const char& c = s[i];
if (LIKELY('0' <= c && c <= '9')) {
found_value = true;
// Ignore digits once the type's precision limit is reached. This avoids
// overflowing the underlying storage while handling a string like
// 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
// an exponent will be made later.
if (LIKELY(type_precision > precision) && !has_round) {
value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
++precision;
scale += found_dot;
cur_digit = precision - scale;
} else if (!found_dot && max_digit < (precision - scale)) {
*result = StringParser::PARSE_OVERFLOW;
value = is_negative
? vectorized::min_decimal_value<DecimalType>(type_precision)
cur_digit = precision - scale;
} else if (!found_dot && max_digit < (precision - scale)) {
*result = StringParser::PARSE_OVERFLOW;
value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
: vectorized::max_decimal_value<DecimalType>(type_precision);
return value;
} else if (found_dot && scale >= type_scale && !has_round) {
// make rounding cases
if (c > '4') {
value += 1;
}
has_round = true;
continue;
} else if (!found_dot) {
++cur_digit;
return value;
} else if (found_dot && scale >= type_scale && !has_round) {
// make rounding cases
if (c > '4') {
value += 1;
}
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
} else if (c == '.' && LIKELY(!found_dot)) {
found_dot = 1;
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
found_exponent = true;
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
*result = StringParser::PARSE_UNDERFLOW;
}
return 0;
}
break;
} else {
if (value == 0) {
*result = StringParser::PARSE_FAILURE;
return 0;
}
// here to handle
*result = StringParser::PARSE_SUCCESS;
if (type_scale >= scale) {
value *= get_scale_multiplier<T>(type_scale - scale);
// here meet non-valid character, should return the value, keep going to meet
// the E/e character because we make right user-given type_precision
// not max number type_precision
if (!is_numeric_ascii(c)) {
if (cur_digit > type_precision) {
*result = StringParser::PARSE_OVERFLOW;
value = is_negative ? vectorized::min_decimal_value<DecimalType>(
type_precision)
: vectorized::max_decimal_value<DecimalType>(
type_precision);
return value;
}
return is_negative ? T(-value) : T(value);
}
}
return is_negative ? T(-value) : T(value);
has_round = true;
continue;
} else if (!found_dot) {
++cur_digit;
}
DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
} else if (c == '.' && LIKELY(!found_dot)) {
found_dot = 1;
} else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
found_exponent = true;
exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
*result = StringParser::PARSE_UNDERFLOW;
}
return 0;
}
break;
} else {
if (value == 0) {
*result = StringParser::PARSE_FAILURE;
return 0;
}
// here to handle
*result = StringParser::PARSE_SUCCESS;
if (type_scale >= scale) {
value *= get_scale_multiplier<T>(type_scale - scale);
// here meet non-valid character, should return the value, keep going to meet
// the E/e character because we make right user-given type_precision
// not max number type_precision
if (!is_numeric_ascii(c)) {
if (cur_digit > type_precision) {
*result = StringParser::PARSE_OVERFLOW;
value = is_negative
? vectorized::min_decimal_value<DecimalType>(type_precision)
: vectorized::max_decimal_value<DecimalType>(
type_precision);
return value;
}
return is_negative ? T(-value) : T(value);
}
}
return is_negative ? T(-value) : T(value);
}
}

View File

@ -103,7 +103,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
"12345678901234567.012345677", "12345678901234567.012345677",
"999999999999999999.999999999"},
{"12345678901234567.012345678", "123456789012345678.012345670",
"12345678901234567.012345678", "", ""}),
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
// (7,2) (6,3) (7,3) (8,1)

View File

@ -74,21 +74,22 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"},
{"doris be better"}),
// decimal ==> decimalv2(decimal<128>(27,9))
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
{
// (17, 9)(first 0 will ignore)
"012345678901234567.012345678",
// (18, 8) (automatically fill 0 for scala)
"123456789012345678.01234567",
// (17, 10) (rounding last to make it fit)
"12345678901234567.0123456779",
// (17, 11) (rounding last to make it fit)
"12345678901234567.01234567791",
// (19, 8) (wrong)
"1234567890123456789.01234567",
},
{"12345678901234567.012345678", "123456789012345678.012345670",
"12345678901234567.012345678", "", ""}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DECIMAL,
{
// (17, 9)(first 0 will ignore)
"012345678901234567.012345678",
// (18, 8) (automatically fill 0 for scala)
"123456789012345678.01234567",
// (17, 10) (rounding last to make it fit)
"12345678901234567.0123456779",
// (17, 11) (rounding last to make it fit)
"12345678901234567.01234567791",
// (19, 8) (wrong)
"1234567890123456789.01234567",
},
{"12345678901234567.012345678", "123456789012345678.012345670",
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
{"1234567.12", "123456.123", "1234567.123", "12345679.1"},

View File

@ -74,21 +74,22 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris be better"},
{"doris be better"}),
// decimal ==> decimalv2(decimal<128>(27,9))
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
{
// (17, 9)(first 0 will ignore)
"012345678901234567.012345678",
// (18, 8) (automatically fill 0 for scala)
"123456789012345678.01234567",
// (17, 10) (rounding last to make it fit)
"12345678901234567.0123456779",
// (17, 11) (rounding last to make it fit)
"12345678901234567.01234567791",
// (19, 8) (wrong)
"1234567890123456789.01234567",
},
{"12345678901234567.012345678", "123456789012345678.012345670",
"12345678901234567.012345678", "", ""}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DECIMAL,
{
// (17, 9)(first 0 will ignore)
"012345678901234567.012345678",
// (18, 8) (automatically fill 0 for scala)
"123456789012345678.01234567",
// (17, 10) (rounding last to make it fit)
"12345678901234567.0123456779",
// (17, 11) (rounding last to make it fit)
"12345678901234567.01234567791",
// (19, 8) (wrong)
"1234567890123456789.01234567",
},
{"12345678901234567.012345678", "123456789012345678.012345670",
"12345678901234567.012345678", "12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2) (7,2) (6,3) (7,3) (8,1)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
{"1234567.12", "123456.123", "1234567.123", "12345679.1"},
@ -429,13 +430,13 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
"[\\1234567890123456789.01234567\\]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, null, null]",
"12345678901234567.012345678, 12345678901234567.012345678, null]",
"[null, null, null, null, null]", "[null]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, null, null]",
"12345678901234567.012345678, 12345678901234567.012345678, null]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, null, null]",
"12345678901234567.012345678, 12345678901234567.012345678, null]",
"[null]"}),
};
// array type

View File

@ -15,11 +15,3 @@
11.99990
837.43444
-- !decimalv2_insert --
999999999999999999.999999999 1.000000000
-999999999999999999.999999999 2.000000000
999999999999999999.999999999 3.000000000
-999999999999999999.999999999 4.000000000
999999999999999999.999999999 5.000000000
-999999999999999999.999999999 6.000000000

View File

@ -27,10 +27,10 @@
999999999999999999.999999999
-- !multi_overflow2 --
999999999999999999.999999999 999999999999999999.999999999000000000
999999999999999999.999999999 999999999999999999.999999999
-- !multi_overflow3 --
999999999999999999.999999999 999999999999999999.999999999000000000
999999999999999999.999999999 999999999999999999.999999999
-- !multi_overflow4 --
999999999999999999.999999999 1.000000000 999999999999999999.999999999
@ -39,10 +39,10 @@
99999999999999999.999999999 0.100000000 999999999999999999.999999990
-- !div_overflow2 --
999999999999999999.999999990
999999999999999999.99999999
-- !div_overflow3 --
99999999999999999.999999999 0.1 999999999999999999.9999999900000
99999999999999999.999999999 0.1 999999999999999999.999999990
-- !div_overflow4 --
999999999999999999.999999990
@ -59,3 +59,6 @@
-- !mod4 --
0.099999999
-- !sql --
2023-12-18T00:00 95357.10

View File

@ -84,42 +84,6 @@ suite("test_decimalv2_load", "nonConcurrent") {
select * from ${tableName2} order by 1;
"""
sql """
drop table if exists test_decimalv2_insert;
"""
sql """
CREATE TABLE `test_decimalv2_insert` (
`k1` decimalv2(27, 9) null,
`k2` decimalv2(27, 9) null
)
DISTRIBUTED BY HASH(`k1`) BUCKETS 10
PROPERTIES (
"replication_num" = "1"
);
"""
sql "set enable_insert_strict=true;"
// overflow, max is inserted
sql """
insert into test_decimalv2_insert values("999999999999999999999999999999",1);
"""
// underflow, min is inserted
sql """
insert into test_decimalv2_insert values("-999999999999999999999999999999",2);
"""
sql """
insert into test_decimalv2_insert values("999999999999999999.9999999991",3);
"""
sql """
insert into test_decimalv2_insert values("-999999999999999999.9999999991",4);
"""
sql """
insert into test_decimalv2_insert values("999999999999999999.9999999995",5);
"""
sql """
insert into test_decimalv2_insert values("-999999999999999999.9999999995",6);
"""
qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; "
sql """
admin set frontend config("enable_decimal_conversion" = "true");
"""

View File

@ -269,6 +269,24 @@ suite("test_decimalv2_overflow2") {
"""
sql """ drop TABLE if exists test_table """
sql """ CREATE TABLE `test_table` (
`day_date` datetime NULL COMMENT '',
`growth_money` decimalv2(18, 2) NULL COMMENT ''
) ENGINE=OLAP
UNIQUE KEY(`day_date`)
COMMENT ''
DISTRIBUTED BY HASH(`day_date`) BUCKETS 4
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
); """
sql """ insert into test_table values ('2023-12-18', '95357.100000000000000000000000000000000000')"""
qt_sql """ select * from test_table """
sql """ drop TABLE if exists test_table """
// TODO
// decimalv2 +-*/ integer
// integer +-*/ decimalv2