branch-2.1: [fix](json) fix parsing double in jsonb #46977 (#47065)

Cherry-picked from #46977

Co-authored-by: Sun Chenyang <sunchenyang@selectdb.com>
This commit is contained in:
github-actions[bot]
2025-01-18 00:25:13 +08:00
committed by GitHub
parent 43bfca9ba5
commit 9dffe5992e
4 changed files with 118 additions and 4 deletions

View File

@ -136,7 +136,7 @@ public:
break;
}
case simdjson::ondemand::json_type::number: {
write_number(doc.get_number());
write_number(doc.get_number(), doc.raw_json_token());
break;
}
}
@ -172,7 +172,7 @@ public:
break;
}
case simdjson::ondemand::json_type::number: {
write_number(value.get_number());
write_number(value.get_number(), value.raw_json_token());
break;
}
case simdjson::ondemand::json_type::object: {
@ -290,9 +290,23 @@ public:
}
}
void write_number(simdjson::ondemand::number num) {
void write_number(simdjson::ondemand::number num, std::string_view raw_string) {
if (num.is_double()) {
if (writer_.writeDouble(num.get_double()) == 0) {
double number = num.get_double();
// When a double exceeds the precision that can be represented by a double type in simdjson, it gets converted to 0.
// The correct approach, should be to truncate the double value instead.
if (number == 0) {
StringParser::ParseResult result;
number = StringParser::string_to_float<double>(raw_string.data(), raw_string.size(),
&result);
if (result != StringParser::PARSE_SUCCESS) {
err_ = JsonbErrType::E_INVALID_NUMBER;
LOG(WARNING) << "invalid number, raw string is: " << raw_string;
return;
}
}
if (writer_.writeDouble(number) == 0) {
err_ = JsonbErrType::E_OUTPUT_FAIL;
LOG(WARNING) << "writeDouble failed";
return;

View File

@ -0,0 +1,2 @@
2 {"rebookProfit":3.729672759600005773616970827788463793694972991943359375}
3 3.729672759600005773616970827788463793694972991943359375
Can't render this file because it contains an unexpected character in line 1 and column 4.

View File

@ -0,0 +1,11 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql_select_src --
3.72967275960001
\N
-- !sql_select_dst --
1 3.72967275960001
1 {"rebookProfit":3.72967275960001}
2 {"rebookProfit":3.72967275960001}
3 3.72967275960001

View File

@ -0,0 +1,87 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import org.codehaus.groovy.runtime.IOGroovyMethods
suite("test_json_load_double", "p0") {
def srcTable = "stringTable"
def dstTable = "jsonTable"
def dataFile = "test_json_double.csv"
sql """ DROP TABLE IF EXISTS ${srcTable} """
sql """ DROP TABLE IF EXISTS ${dstTable} """
sql """
CREATE TABLE IF NOT EXISTS ${srcTable} (
id INT not null,
v STRING not null
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES("replication_num" = "1");
"""
sql """
CREATE TABLE IF NOT EXISTS ${dstTable} (
id INT not null,
j JSON not null
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES("replication_num" = "1");
"""
sql """
insert into ${srcTable} values(1,'{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}');
"""
sql """
insert into ${srcTable} values(1,'3.729672759600005773616970827788463793694972991943359375');
"""
sql """ insert into ${dstTable} select * from ${srcTable} """
// load the json data from csv file
streamLoad {
table dstTable
file dataFile // import csv file
time 10000 // limit inflight 10s
set 'strict_mode', 'true'
// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(2, json.NumberTotalRows)
assertEquals(2, json.NumberLoadedRows)
assertTrue(json.LoadBytes > 0)
log.info("url: " + json.ErrorURL)
}
}
qt_sql_select_src """ select jsonb_extract(v, '\$.rebookProfit') from ${srcTable} """
qt_sql_select_dst """ select * from ${dstTable} """
}