[pick](json-serde)pick jsonb string deserialize with spec char (#38711)
## Proposed changes backport: https://github.com/apache/doris/pull/37176 Issue Number: close #xxx <!--Describe your changes.-->
This commit is contained in:
@ -73,19 +73,59 @@ public:
|
||||
auto result = check_column_const_set_readability(column, row_num);
|
||||
ColumnPtr ptr = result.first;
|
||||
row_num = result.second;
|
||||
|
||||
if (_nesting_level > 1) {
|
||||
bw.write('"');
|
||||
}
|
||||
|
||||
const auto& value = assert_cast<const ColumnType&>(*ptr).get_data_at(row_num);
|
||||
bw.write(value.data, value.size);
|
||||
|
||||
if (_nesting_level > 1) {
|
||||
bw.write('"');
|
||||
}
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
|
||||
if (options.escape_char != 0) {
|
||||
// we should make deal with some special characters in json str if we have escape_char
|
||||
StringRef str_ref = value;
|
||||
write_with_escaped_char_to_json(str_ref, bw);
|
||||
} else {
|
||||
bw.write(value.data, value.size);
|
||||
}
|
||||
} else {
|
||||
bw.write(value.data, value.size);
|
||||
}
|
||||
if (_nesting_level > 1) {
|
||||
bw.write('"');
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
inline void write_with_escaped_char_to_json(StringRef value, BufferWritable& bw) const {
|
||||
for (char it : value) {
|
||||
switch (it) {
|
||||
case '\b':
|
||||
bw.write("\\b", 2);
|
||||
break;
|
||||
case '\f':
|
||||
bw.write("\\f", 2);
|
||||
break;
|
||||
case '\n':
|
||||
bw.write("\\n", 2);
|
||||
break;
|
||||
case '\r':
|
||||
bw.write("\\r", 2);
|
||||
break;
|
||||
case '\t':
|
||||
bw.write("\\t", 2);
|
||||
break;
|
||||
case '\\':
|
||||
bw.write("\\\\", 2);
|
||||
break;
|
||||
case '"':
|
||||
bw.write("\\\"", 2);
|
||||
break;
|
||||
default:
|
||||
bw.write(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
|
||||
BufferWritable& bw, FormatOptions& options) const override {
|
||||
SERIALIZE_COLUMN_TO_JSON();
|
||||
|
||||
@ -785,6 +785,7 @@ struct ConvertImplGenericToJsonb {
|
||||
|
||||
auto tmp_col = ColumnString::create();
|
||||
vectorized::DataTypeSerDe::FormatOptions options;
|
||||
options.escape_char = '\\';
|
||||
for (size_t i = 0; i < input_rows_count; i++) {
|
||||
// convert to string
|
||||
tmp_col->clear();
|
||||
|
||||
4
regression-test/data/jsonb_p0/test_jsonb_cast.csv
Normal file
4
regression-test/data/jsonb_p0/test_jsonb_cast.csv
Normal file
@ -0,0 +1,4 @@
|
||||
1 \N
|
||||
2 ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}']
|
||||
3 ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
|
||||
4 ['\/some\/cool\/url', '/some/cool/url', 'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
|
||||
|
Can't render this file because it contains an unexpected character in line 2 and column 17.
|
27
regression-test/data/jsonb_p0/test_jsonb_cast.out
Normal file
27
regression-test/data/jsonb_p0/test_jsonb_cast.out
Normal file
@ -0,0 +1,27 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !select_1 --
|
||||
1 \N
|
||||
2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
|
||||
3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
|
||||
4 ["/some/cool/url", "/some/cool/url", "a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
|
||||
|
||||
-- !select_2 --
|
||||
1 \N
|
||||
2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
|
||||
3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
|
||||
4 ["/some/cool/url", "/some/cool/url", "a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
|
||||
27 ["{"k1":"v1", "k2": 200}"]
|
||||
28 ["{"a.b.c":{"k1.a1":"v31", "k2": 300},"a":"niu"}"]
|
||||
29 ["\n\r", "\n\r"]
|
||||
30 ["f\r\n", "f\r\n""]
|
||||
|
||||
-- !select_json --
|
||||
1 \N
|
||||
2 ["{'x' : '{\\"y\\" : 1}', 't' : '{\\"y\\" : 2}'}","{\\"x\\" : 1}"]
|
||||
3 ["foo'bar', 'foo\\"bar', 'foo\\\\'bar', 'foo''bar"]
|
||||
4 ["/some/cool/url","/some/cool/url","a\\\\_\\\\c\\\\l\\\\i\\\\c\\\\k\\\\h\\\\o\\\\u\\\\s\\\\e"]
|
||||
27 ["{\\"k1\\":\\"v1\\", \\"k2\\": 200}"]
|
||||
28 ["{\\"a.b.c\\":{\\"k1.a1\\":\\"v31\\", \\"k2\\": 300},\\"a\\":\\"niu\\"}"]
|
||||
29 ["\\f\\n\\r","\\f\\n\\r"]
|
||||
30 ["f\\b\\r\\n","f\\b\\r\\n\\""]
|
||||
|
||||
79
regression-test/suites/jsonb_p0/test_jsonb_cast.groovy
Normal file
79
regression-test/suites/jsonb_p0/test_jsonb_cast.groovy
Normal file
@ -0,0 +1,79 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
import org.codehaus.groovy.runtime.IOGroovyMethods
|
||||
|
||||
suite("test_jsonb_cast", "p0") {
|
||||
|
||||
// define a sql table with array<text> which has some Escape Character and should also to cast to json
|
||||
def testTable = "tbl_test_array_text_cast_jsonb"
|
||||
def dataFile = "test_jsonb_cast.csv"
|
||||
|
||||
sql """ set experimental_enable_nereids_planner = true """
|
||||
sql """ set enable_fallback_to_original_planner = true """
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${testTable}"
|
||||
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${testTable} (
|
||||
id INT,
|
||||
a ARRAY<TEXT>,
|
||||
)
|
||||
DUPLICATE KEY(id)
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 3
|
||||
PROPERTIES("replication_num" = "1");
|
||||
"""
|
||||
|
||||
// load the jsonb data from csv file
|
||||
streamLoad {
|
||||
table testTable
|
||||
|
||||
file dataFile // import csv file
|
||||
time 10000 // limit inflight 10s
|
||||
set 'strict_mode', 'true'
|
||||
|
||||
// if declared a check callback, the default check condition will ignore.
|
||||
// So you must check all condition
|
||||
check { result, exception, startTime, endTime ->
|
||||
if (exception != null) {
|
||||
throw exception
|
||||
}
|
||||
log.info("Stream load result: ${result}".toString())
|
||||
def json = parseJson(result)
|
||||
assertEquals(4, json.NumberTotalRows)
|
||||
assertEquals(4, json.NumberLoadedRows)
|
||||
assertTrue(json.LoadBytes > 0)
|
||||
}
|
||||
}
|
||||
|
||||
sql """ sync; """
|
||||
|
||||
// check result
|
||||
qt_select_1 "SELECT * FROM ${testTable} ORDER BY id"
|
||||
|
||||
|
||||
// insert into valid json rows
|
||||
sql """INSERT INTO ${testTable} VALUES(27, ['{"k1":"v1", "k2": 200}'])"""
|
||||
sql """INSERT INTO ${testTable} VALUES(28, ['{"a.b.c":{"k1.a1":"v31", "k2": 300},"a":"niu"}'])"""
|
||||
sql """INSERT INTO ${testTable} VALUES(29, ['\f\n\r', "\f\n\r"])"""
|
||||
sql """INSERT INTO ${testTable} VALUES(30, ["\\f\\b\\r\\n", '\\f\\b\\r\\n"'])"""
|
||||
|
||||
// check result
|
||||
qt_select_2 "SELECT * FROM ${testTable} ORDER BY id"
|
||||
// check cast as json
|
||||
qt_select_json "SELECT id, cast(a as JSON) FROM ${testTable} ORDER BY id"
|
||||
}
|
||||
Reference in New Issue
Block a user