[function](cast)Make string casting to integers more like MySQL's beh… (#41541)
…avior (#38847) https://github.com/apache/doris/pull/38847 ## Proposed changes There are two issues here. First, the results of casting are inconsistent between FE and BE . ``` FE mysql [(none)]>select cast('3.000' as int); +----------------------+ | cast('3.000' as INT) | +----------------------+ | 3 | +----------------------+ mysql [(none)]>set debug_skip_fold_constant = true; BE mysql [(none)]>select cast('3.000' as int); +----------------------+ | cast('3.000' as INT) | +----------------------+ | NULL | +----------------------+ ``` The second issue is that casting on BE converts '3.0' to null. Here, the casting logic for FE and BE has been unified <!--Describe your changes.--> ## Proposed changes Issue Number: close #xxx <!--Describe your changes.--> --------- Co-authored-by: Xinyi Zou <zouxinyi02@gmail.com>
This commit is contained in:
@ -98,6 +98,13 @@ int64_t ParseUtil::parse_mem_spec(const std::string& mem_spec_str, int64_t paren
|
||||
if (result != StringParser::PARSE_SUCCESS) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto limit_val_double =
|
||||
StringParser::string_to_float<double>(mem_spec_str.data(), number_str_len, &result);
|
||||
if (result == StringParser::PARSE_SUCCESS && limit_val_double != limit_val) {
|
||||
return -1; // mem_spec_str is double.
|
||||
}
|
||||
|
||||
bytes = limit_val;
|
||||
}
|
||||
|
||||
|
||||
@ -243,6 +243,20 @@ private:
|
||||
return true;
|
||||
}
|
||||
|
||||
// For strings like "3.0", "3.123", and "3.", can parse them as 3.
|
||||
static inline bool is_float_suffix(const char* __restrict s, int len) {
|
||||
return (s[0] == '.' && is_all_digit(s + 1, len - 1));
|
||||
}
|
||||
|
||||
static inline bool is_all_digit(const char* __restrict s, int len) {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns the position of the first non-whitespace character in s.
|
||||
static inline int skip_leading_whitespace(const char* __restrict s, int len) {
|
||||
int i = 0;
|
||||
@ -306,7 +320,8 @@ T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseR
|
||||
}
|
||||
val = val * 10 + digit;
|
||||
} else {
|
||||
if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
|
||||
if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
|
||||
!is_float_suffix(s + i, len - i))))) {
|
||||
// Reject the string because either the first char was not a digit,
|
||||
// or the remaining chars are not all whitespace
|
||||
*result = PARSE_FAILURE;
|
||||
@ -448,7 +463,8 @@ T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, Par
|
||||
T digit = s[i] - '0';
|
||||
val = val * 10 + digit;
|
||||
} else {
|
||||
if ((UNLIKELY(!is_all_whitespace(s + i, len - i)))) {
|
||||
if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
|
||||
!is_float_suffix(s + i, len - i)))) {
|
||||
*result = PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ TEST(TestParseMemSpec, Bad) {
|
||||
for (const auto& value : bad_values) {
|
||||
bool is_percent = false;
|
||||
int64_t bytes = ParseUtil::parse_mem_spec(value, -1, MemInfo::_s_physical_mem, &is_percent);
|
||||
EXPECT_EQ(-1, bytes);
|
||||
EXPECT_EQ(-1, bytes) << ", value: " << value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -98,3 +98,9 @@
|
||||
-- !test --
|
||||
a
|
||||
|
||||
-- !cast_string_to_int --
|
||||
3 3 0 0 3 \N 3
|
||||
|
||||
-- !cast_string_to_int --
|
||||
3 3 0 0 3 \N 3
|
||||
|
||||
|
||||
@ -1,18 +1,21 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql1 --
|
||||
\N \N \N \N \N \N \N \N \N \N \N \N
|
||||
3 3 3 3 3 \N \N \N \N \N \N \N
|
||||
|
||||
-- !sql2 --
|
||||
\N \N \N \N \N \N \N \N \N \N \N \N
|
||||
3 3 3 3 3 \N \N \N \N \N \N \N
|
||||
3 3 3 3 3 \N \N \N \N \N \N \N
|
||||
|
||||
-- !sql3 --
|
||||
\N \N \N \N \N \N \N \N \N \N \N \N
|
||||
3 3 3 3 3 \N \N \N \N \N \N \N
|
||||
|
||||
-- !sql4 --
|
||||
\N \N \N \N \N 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00
|
||||
3 3 3 3 3 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00
|
||||
|
||||
-- !sql5 --
|
||||
\N \N \N \N \N \N \N 99999999.9 \N \N \N \N
|
||||
3 3 3 3 3 3.12 3.12 3.1 2024-04-02 2024-04-02 2024-04-02T17:00 2024-04-02T17:00
|
||||
|
||||
-- !sql6 --
|
||||
\N \N \N \N \N \N \N 99999999.9 \N \N \N \N
|
||||
|
||||
@ -30,11 +30,11 @@ true
|
||||
\N
|
||||
|
||||
-- !sql_to_small --
|
||||
\N
|
||||
1212
|
||||
|
||||
-- !sql_to_int --
|
||||
\N
|
||||
1212
|
||||
|
||||
-- !sql_to_big --
|
||||
\N
|
||||
1212
|
||||
|
||||
|
||||
@ -58,8 +58,8 @@
|
||||
14 [null]
|
||||
17 [1]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_3 --
|
||||
19 1 {"c":1}
|
||||
@ -146,8 +146,8 @@
|
||||
14 [null]
|
||||
17 [1]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_33 --
|
||||
19 1 {"c":1}
|
||||
@ -206,7 +206,7 @@
|
||||
-- !sql_2 --
|
||||
14 [null]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_3 --
|
||||
19 1 {"c":1}
|
||||
@ -259,7 +259,7 @@
|
||||
-- !sql_22 --
|
||||
14 [null]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_33 --
|
||||
19 1 {"c":1}
|
||||
@ -310,7 +310,7 @@
|
||||
-- !sql_2 --
|
||||
14 [null]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_3 --
|
||||
19 1 {"c":1}
|
||||
@ -363,7 +363,7 @@
|
||||
-- !sql_22 --
|
||||
14 [null]
|
||||
17 [1]
|
||||
18 [1, 2, null]
|
||||
18 [1, 2, 1]
|
||||
|
||||
-- !sql_33 --
|
||||
19 1 {"c":1}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql1 --
|
||||
1 [1, 2, null]
|
||||
1 [1, 2, 1]
|
||||
1 [1]
|
||||
1 [1]
|
||||
1 [null]
|
||||
|
||||
@ -381,5 +381,11 @@ suite("test_string_basic") {
|
||||
}
|
||||
assertEquals(table_too_long, "fail")
|
||||
sql "drop table if exists varchar_table_too_long;"
|
||||
}
|
||||
|
||||
// calculations on the BE.
|
||||
sql """ set debug_skip_fold_constant = true;"""
|
||||
qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) , cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int), cast('3.000 ' as int), cast('3.' as int)"""
|
||||
// calculations on the FE.
|
||||
sql """ set debug_skip_fold_constant = false;"""
|
||||
qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int) , cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int), cast('3.000 ' as int), cast('3.' as int)"""
|
||||
}
|
||||
@ -62,7 +62,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql1 "select * from ${tableName}"
|
||||
qt_sql1 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
@ -83,9 +83,9 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
assertEquals("fail", json.Status.toLowerCase())
|
||||
assertEquals("success", json.Status.toLowerCase())
|
||||
assertEquals(1, json.NumberTotalRows)
|
||||
assertEquals(1, json.NumberFilteredRows)
|
||||
assertEquals(0, json.NumberFilteredRows)
|
||||
}
|
||||
}
|
||||
|
||||
@ -112,7 +112,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql2 "select * from ${tableName}"
|
||||
qt_sql2 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
@ -134,9 +134,9 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
assertEquals("fail", json.Status.toLowerCase())
|
||||
assertEquals("success", json.Status.toLowerCase())
|
||||
assertEquals(1, json.NumberTotalRows)
|
||||
assertEquals(1, json.NumberFilteredRows)
|
||||
assertEquals(0, json.NumberFilteredRows)
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql3 "select * from ${tableName}"
|
||||
qt_sql3 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
@ -210,7 +210,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql4 "select * from ${tableName}"
|
||||
qt_sql4 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
@ -231,9 +231,9 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
assertEquals("fail", json.Status.toLowerCase())
|
||||
assertEquals("success", json.Status.toLowerCase())
|
||||
assertEquals(1, json.NumberTotalRows)
|
||||
assertEquals(1, json.NumberFilteredRows)
|
||||
assertEquals(0, json.NumberFilteredRows)
|
||||
}
|
||||
}
|
||||
|
||||
@ -259,7 +259,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql5 "select * from ${tableName}"
|
||||
qt_sql5 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
@ -311,7 +311,7 @@ suite("test_stream_load_cast", "p0") {
|
||||
}
|
||||
}
|
||||
sql "sync"
|
||||
qt_sql6 "select * from ${tableName}"
|
||||
qt_sql6 "select * from ${tableName} order by k0"
|
||||
sql "sync"
|
||||
sql "truncate table ${tableName}"
|
||||
sql "sync"
|
||||
|
||||
Reference in New Issue
Block a user