[cherry-pick](jsonb) add a check for jsonb value to avoid invalid jsonb value write into segment file (#48729)
…ke select core (#48625) fix invalid jsonb value write into segment file which make select core, so we add a check for jsonb value when convert_to_olap which value will be written into segment file
This commit is contained in:
@ -177,7 +177,7 @@ public:
|
||||
static JsonbDocument* makeDocument(char* pb, uint32_t size, const JsonbValue* rval);
|
||||
|
||||
// create an JsonbDocument object from JSONB packed bytes
|
||||
static JsonbDocument* createDocument(const char* pb, uint32_t size);
|
||||
static JsonbDocument* checkAndCreateDocument(const char* pb, size_t size);
|
||||
|
||||
// create an JsonbValue from JSONB packed bytes
|
||||
static JsonbValue* createValue(const char* pb, uint32_t size);
|
||||
@ -1138,7 +1138,7 @@ inline JsonbDocument* JsonbDocument::makeDocument(char* pb, uint32_t size, const
|
||||
return doc;
|
||||
}
|
||||
|
||||
inline JsonbDocument* JsonbDocument::createDocument(const char* pb, uint32_t size) {
|
||||
inline JsonbDocument* JsonbDocument::checkAndCreateDocument(const char* pb, size_t size) {
|
||||
if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ public:
|
||||
|
||||
// get json string
|
||||
const std::string to_json_string(const char* data, size_t size) {
|
||||
JsonbDocument* pdoc = doris::JsonbDocument::createDocument(data, size);
|
||||
JsonbDocument* pdoc = doris::JsonbDocument::checkAndCreateDocument(data, size);
|
||||
if (!pdoc) {
|
||||
LOG(FATAL) << "invalid json binary value: " << std::string_view(data, size);
|
||||
}
|
||||
|
||||
@ -479,7 +479,8 @@ public:
|
||||
|
||||
OS_TYPE* getOutput() { return os_; }
|
||||
JsonbDocument* getDocument() {
|
||||
return JsonbDocument::createDocument(getOutput()->getBuffer(), getOutput()->getSize());
|
||||
return JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
|
||||
getOutput()->getSize());
|
||||
}
|
||||
|
||||
JsonbValue* getValue() {
|
||||
|
||||
@ -63,7 +63,7 @@ void VExplodeJsonArrayTableFunction<DataImpl>::process_row(size_t row_idx) {
|
||||
StringRef text = _text_column->get_data_at(row_idx);
|
||||
if (text.data != nullptr) {
|
||||
if (WhichDataType(_text_datatype).is_json()) {
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size);
|
||||
if (doc && doc->getValue() && doc->getValue()->isArray()) {
|
||||
auto* a = (ArrayVal*)doc->getValue();
|
||||
if (a->numElem() > 0) {
|
||||
|
||||
@ -54,8 +54,8 @@ void VExplodeJsonObjectTableFunction::process_row(size_t row_idx) {
|
||||
|
||||
StringRef text = _json_object_column->get_data_at(row_idx);
|
||||
if (text.data != nullptr) {
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(text.data, text.size);
|
||||
if (!doc || !doc->getValue()) [[unlikely]] {
|
||||
// error jsonb, put null into output, cur_size = 0 , we will insert_default
|
||||
return;
|
||||
}
|
||||
|
||||
@ -714,7 +714,7 @@ struct ConvertImplGenericFromJsonb {
|
||||
const bool is_dst_string = is_string_or_fixed_string(data_type_to);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const auto& val = col_from_string->get_data_at(i);
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
(*vec_null_map_to)[i] = 1;
|
||||
col_to->insert_default();
|
||||
@ -862,7 +862,7 @@ struct ConvertImplFromJsonb {
|
||||
}
|
||||
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(val.data, val.size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
|
||||
@ -557,7 +557,7 @@ private:
|
||||
continue;
|
||||
}
|
||||
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
dst_arr.clear();
|
||||
return Status::InvalidArgument("jsonb data is invalid");
|
||||
@ -665,7 +665,7 @@ private:
|
||||
static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
|
||||
int l_str_size, JsonbPath& path) {
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
return;
|
||||
}
|
||||
@ -760,7 +760,7 @@ private:
|
||||
}
|
||||
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
StringOP::push_null_string(i, res_data, res_offsets, null_map);
|
||||
return;
|
||||
@ -886,7 +886,7 @@ public:
|
||||
writer->writeStartArray();
|
||||
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw, l_size);
|
||||
|
||||
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
@ -1027,7 +1027,7 @@ private:
|
||||
}
|
||||
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw_str, l_str_size);
|
||||
JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
null_map[i] = 1;
|
||||
res[i] = 0;
|
||||
@ -1406,7 +1406,8 @@ struct JsonbLengthUtil {
|
||||
}
|
||||
auto jsonb_value = jsonb_data_column->get_data_at(i);
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
|
||||
JsonbDocument* doc =
|
||||
JsonbDocument::checkAndCreateDocument(jsonb_value.data, jsonb_value.size);
|
||||
JsonbValue* value = doc->getValue()->findValue(path, nullptr);
|
||||
if (UNLIKELY(!value)) {
|
||||
null_map->get_data()[i] = 1;
|
||||
@ -1541,9 +1542,9 @@ struct JsonbContainsUtil {
|
||||
}
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc1 =
|
||||
JsonbDocument::createDocument(jsonb_value1.data, jsonb_value1.size);
|
||||
JsonbDocument::checkAndCreateDocument(jsonb_value1.data, jsonb_value1.size);
|
||||
JsonbDocument* doc2 =
|
||||
JsonbDocument::createDocument(jsonb_value2.data, jsonb_value2.size);
|
||||
JsonbDocument::checkAndCreateDocument(jsonb_value2.data, jsonb_value2.size);
|
||||
|
||||
JsonbValue* value1 = doc1->getValue()->findValue(path, nullptr);
|
||||
JsonbValue* value2 = doc2->getValue();
|
||||
|
||||
@ -86,7 +86,7 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const
|
||||
const std::unordered_map<uint32_t, uint32_t>& col_id_to_idx,
|
||||
Block& dst,
|
||||
const std::vector<std::string>& default_values) {
|
||||
auto pdoc = JsonbDocument::createDocument(data, size);
|
||||
auto pdoc = JsonbDocument::checkAndCreateDocument(data, size);
|
||||
JsonbDocument& doc = *pdoc;
|
||||
size_t num_rows = dst.rows();
|
||||
size_t filled_columns = 0;
|
||||
@ -120,4 +120,4 @@ void JsonbSerializeUtil::jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -139,7 +139,7 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
|
||||
return std::make_unique<OlapColumnDataConvertorDecimalV3<Decimal256>>();
|
||||
}
|
||||
case FieldType::OLAP_FIELD_TYPE_JSONB: {
|
||||
return std::make_unique<OlapColumnDataConvertorVarChar>(true);
|
||||
return std::make_unique<OlapColumnDataConvertorVarChar>(true, true);
|
||||
}
|
||||
case FieldType::OLAP_FIELD_TYPE_BOOL: {
|
||||
return std::make_unique<OlapColumnDataConvertorSimple<vectorized::UInt8>>();
|
||||
@ -204,7 +204,10 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
|
||||
void OlapBlockDataConvertor::set_source_content(const vectorized::Block* block, size_t row_pos,
|
||||
size_t num_rows) {
|
||||
DCHECK(block && num_rows > 0 && row_pos + num_rows <= block->rows() &&
|
||||
block->columns() == _convertors.size());
|
||||
block->columns() == _convertors.size())
|
||||
<< "block=" << block->dump_structure() << ", block rows=" << block->rows()
|
||||
<< ", row_pos=" << row_pos << ", num_rows=" << num_rows
|
||||
<< ", convertors.size=" << _convertors.size();
|
||||
size_t cid = 0;
|
||||
for (const auto& typed_column : *block) {
|
||||
if (typed_column.column->size() != block->rows()) {
|
||||
@ -601,8 +604,8 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() {
|
||||
|
||||
// class OlapBlockDataConvertor::OlapColumnDataConvertorVarChar
|
||||
OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::OlapColumnDataConvertorVarChar(
|
||||
bool check_length)
|
||||
: _check_length(check_length) {}
|
||||
bool check_length, bool is_jsonb)
|
||||
: _check_length(check_length), _is_jsonb(is_jsonb) {}
|
||||
|
||||
void OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::set_source_column(
|
||||
const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t num_rows) {
|
||||
@ -646,6 +649,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
|
||||
"Not support string len over than "
|
||||
"`string_type_length_soft_limit_bytes` in vec engine.");
|
||||
}
|
||||
// Make sure that the json binary data written in is the correct jsonb value.
|
||||
if (_is_jsonb &&
|
||||
!doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) {
|
||||
return Status::InvalidArgument("invalid json binary value: {}",
|
||||
std::string_view(slice->data, slice->size));
|
||||
}
|
||||
} else {
|
||||
// TODO: this may not be necessary, check and remove later
|
||||
slice->data = nullptr;
|
||||
@ -667,6 +676,12 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorVarChar::convert_to_olap(
|
||||
"Not support string len over than `string_type_length_soft_limit_bytes`"
|
||||
" in vec engine.");
|
||||
}
|
||||
// Make sure that the json binary data written in is the correct jsonb value.
|
||||
if (_is_jsonb &&
|
||||
!doris::JsonbDocument::checkAndCreateDocument(slice->data, slice->size)) {
|
||||
return Status::InvalidArgument("invalid json binary value: {}",
|
||||
std::string_view(slice->data, slice->size));
|
||||
}
|
||||
string_offset = *offset_cur;
|
||||
++slice;
|
||||
++offset_cur;
|
||||
|
||||
@ -197,7 +197,7 @@ private:
|
||||
|
||||
class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase {
|
||||
public:
|
||||
OlapColumnDataConvertorVarChar(bool check_length);
|
||||
OlapColumnDataConvertorVarChar(bool check_length, bool is_jsonb = false);
|
||||
~OlapColumnDataConvertorVarChar() override = default;
|
||||
|
||||
void set_source_column(const ColumnWithTypeAndName& typed_column, size_t row_pos,
|
||||
@ -209,6 +209,8 @@ private:
|
||||
|
||||
private:
|
||||
bool _check_length;
|
||||
bool _is_jsonb =
|
||||
false; // Make sure that the json binary data written in is the correct jsonb value.
|
||||
PaddedPODArray<Slice> _slice;
|
||||
};
|
||||
|
||||
|
||||
@ -240,7 +240,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
|
||||
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
|
||||
jsonb_writer.getOutput()->getSize());
|
||||
StringRef jsonb_data = jsonb_column->get_data_at(0);
|
||||
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
|
||||
auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size);
|
||||
JsonbDocument& doc = *pdoc;
|
||||
for (auto it = doc->begin(); it != doc->end(); ++it) {
|
||||
serde->read_one_cell_from_jsonb(*vec, it->value());
|
||||
@ -270,7 +270,7 @@ TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
|
||||
jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
|
||||
jsonb_writer.getOutput()->getSize());
|
||||
StringRef jsonb_data = jsonb_column->get_data_at(0);
|
||||
auto pdoc = JsonbDocument::createDocument(jsonb_data.data, jsonb_data.size);
|
||||
auto pdoc = JsonbDocument::checkAndCreateDocument(jsonb_data.data, jsonb_data.size);
|
||||
JsonbDocument& doc = *pdoc;
|
||||
for (auto it = doc->begin(); it != doc->end(); ++it) {
|
||||
serde->read_one_cell_from_jsonb(*vec, it->value());
|
||||
|
||||
242
be/test/vec/olap/jsonb_value_test.cpp
Normal file
242
be/test/vec/olap/jsonb_value_test.cpp
Normal file
@ -0,0 +1,242 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest-message.h>
|
||||
#include <gtest/gtest-test-part.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "gtest/gtest_pred_impl.h"
|
||||
#include "vec/columns/column_string.h"
|
||||
#include "vec/common/string_ref.h"
|
||||
#include "vec/core/columns_with_type_and_name.h"
|
||||
#include "vec/data_types/serde/data_type_serde.h"
|
||||
#include "vec/olap/olap_data_convertor.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
TEST(JsonbValueConvertorTest, JsonbValueValid) {
|
||||
// 1. create jsonb column with serde
|
||||
auto input = ColumnString::create();
|
||||
auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
|
||||
auto serde = dataTypeJsonb->get_serde();
|
||||
vectorized::DataTypeSerDe::FormatOptions options;
|
||||
|
||||
// Test case 1
|
||||
std::string str1 = "{\"key1\": \"value1\"}";
|
||||
Slice slice1 = Slice(str1.data(), str1.length());
|
||||
auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
|
||||
ASSERT_TRUE(st1.ok());
|
||||
ASSERT_EQ(input->size(), 1);
|
||||
|
||||
// Test case 2
|
||||
std::string str2 = "{\"key2\": 12345}";
|
||||
Slice slice2 = Slice(str2.data(), str2.length());
|
||||
auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
|
||||
ASSERT_TRUE(st2.ok());
|
||||
ASSERT_EQ(input->size(), 2);
|
||||
|
||||
// Test case 3
|
||||
std::string str3 = "{\"key3\": true}";
|
||||
Slice slice3 = Slice(str3.data(), str3.length());
|
||||
auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
|
||||
ASSERT_TRUE(st3.ok());
|
||||
ASSERT_EQ(input->size(), 3);
|
||||
|
||||
// Test case 4
|
||||
std::string str4 = "{\"key4\": [1, 2, 3]}";
|
||||
Slice slice4 = Slice(str4.data(), str4.length());
|
||||
auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
|
||||
ASSERT_TRUE(st4.ok());
|
||||
ASSERT_EQ(input->size(), 4);
|
||||
|
||||
// Test case 5
|
||||
std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
|
||||
Slice slice5 = Slice(str5.data(), str5.length());
|
||||
auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
|
||||
ASSERT_TRUE(st5.ok());
|
||||
ASSERT_EQ(input->size(), 5);
|
||||
|
||||
// 2. put column into block
|
||||
vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), dataTypeJsonb,
|
||||
"jsonb_column");
|
||||
Block block;
|
||||
block.insert(argument);
|
||||
|
||||
// 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data
|
||||
auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
|
||||
TabletColumn jsonb_column = TabletColumn();
|
||||
jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
|
||||
_olap_data_convertor->add_column_data_convertor(jsonb_column);
|
||||
_olap_data_convertor->set_source_content(&block, 0, 5);
|
||||
auto [status, column] = _olap_data_convertor->convert_column_data(0);
|
||||
ASSERT_TRUE(status.ok());
|
||||
ASSERT_NE(column, nullptr);
|
||||
|
||||
// test with null map
|
||||
auto nullable_col = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create());
|
||||
auto nullable_dataTypeJsonb = make_nullable(std::make_shared<DataTypeJsonb>());
|
||||
auto serde1 = nullable_dataTypeJsonb->get_serde();
|
||||
|
||||
auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(1, nullable_col->size());
|
||||
|
||||
// insert null
|
||||
nullable_col->insert_default();
|
||||
ASSERT_EQ(2, nullable_col->size());
|
||||
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(3, nullable_col->size());
|
||||
|
||||
// deserialize null
|
||||
Slice slice_null = "NULL";
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(4, nullable_col->size());
|
||||
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(5, nullable_col->size());
|
||||
|
||||
// 2. put column into block
|
||||
vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
|
||||
nullable_dataTypeJsonb, "jsonb_column_null");
|
||||
block.clear();
|
||||
block.insert(argument1);
|
||||
|
||||
// 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data
|
||||
_olap_data_convertor->reset();
|
||||
_olap_data_convertor->add_column_data_convertor(jsonb_column);
|
||||
_olap_data_convertor->set_source_content(&block, 0, 5);
|
||||
auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
|
||||
ASSERT_TRUE(status1.ok()) << status1.to_string();
|
||||
ASSERT_NE(column1, nullptr);
|
||||
}
|
||||
|
||||
TEST(JsonbValueConvertorTest, JsonbValueInvalid) {
|
||||
// 1. create jsonb column with serde
|
||||
auto input = ColumnString::create();
|
||||
auto dataTypeJsonb = std::make_shared<DataTypeJsonb>();
|
||||
auto serde = dataTypeJsonb->get_serde();
|
||||
vectorized::DataTypeSerDe::FormatOptions options;
|
||||
|
||||
// Test case 1
|
||||
std::string str1 = "{\"key1\": \"value1\"}";
|
||||
Slice slice1 = Slice(str1.data(), str1.length());
|
||||
auto st1 = serde->deserialize_one_cell_from_json(*input, slice1, options);
|
||||
ASSERT_TRUE(st1.ok());
|
||||
ASSERT_EQ(input->size(), 1);
|
||||
|
||||
// Test case 2
|
||||
std::string str2 = "{\"key2\": 12345}";
|
||||
Slice slice2 = Slice(str2.data(), str2.length());
|
||||
auto st2 = serde->deserialize_one_cell_from_json(*input, slice2, options);
|
||||
ASSERT_TRUE(st2.ok());
|
||||
ASSERT_EQ(input->size(), 2);
|
||||
|
||||
// Test case 3
|
||||
std::string str3 = "{\"key3\": true}";
|
||||
Slice slice3 = Slice(str3.data(), str3.length());
|
||||
auto st3 = serde->deserialize_one_cell_from_json(*input, slice3, options);
|
||||
ASSERT_TRUE(st3.ok());
|
||||
ASSERT_EQ(input->size(), 3);
|
||||
|
||||
// Test case 4
|
||||
std::string str4 = "{\"key4\": [1, 2, 3]}";
|
||||
Slice slice4 = Slice(str4.data(), str4.length());
|
||||
auto st4 = serde->deserialize_one_cell_from_json(*input, slice4, options);
|
||||
ASSERT_TRUE(st4.ok());
|
||||
ASSERT_EQ(input->size(), 4);
|
||||
// invalid jsonb data
|
||||
auto& data = input->get_chars();
|
||||
data.emplace_back('s');
|
||||
|
||||
// Test case 5
|
||||
std::string str5 = "{\"key5\": {\"subkey\": \"subvalue\"}}";
|
||||
Slice slice5 = Slice(str5.data(), str5.length());
|
||||
auto st5 = serde->deserialize_one_cell_from_json(*input, slice5, options);
|
||||
ASSERT_TRUE(st5.ok());
|
||||
ASSERT_EQ(input->size(), 5);
|
||||
|
||||
// 2. put column into block
|
||||
vectorized::ColumnWithTypeAndName argument(input->assume_mutable(), dataTypeJsonb,
|
||||
"jsonb_column");
|
||||
Block block;
|
||||
block.insert(argument);
|
||||
|
||||
// 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data
|
||||
auto _olap_data_convertor = std::make_unique<OlapBlockDataConvertor>();
|
||||
TabletColumn jsonb_column = TabletColumn();
|
||||
jsonb_column.set_type(FieldType::OLAP_FIELD_TYPE_JSONB);
|
||||
_olap_data_convertor->add_column_data_convertor(jsonb_column);
|
||||
_olap_data_convertor->set_source_content(&block, 0, 5);
|
||||
auto [status, column] = _olap_data_convertor->convert_column_data(0);
|
||||
// invalid will make error
|
||||
ASSERT_FALSE(status.ok());
|
||||
ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos);
|
||||
ASSERT_NE(column, nullptr);
|
||||
|
||||
// test with null map
|
||||
auto nullable_col = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create());
|
||||
auto nullable_dataTypeJsonb = make_nullable(std::make_shared<DataTypeJsonb>());
|
||||
auto serde1 = nullable_dataTypeJsonb->get_serde();
|
||||
|
||||
auto st = serde1->deserialize_one_cell_from_json(*nullable_col, slice1, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(1, nullable_col->size());
|
||||
|
||||
// insert null
|
||||
nullable_col->insert_default();
|
||||
ASSERT_EQ(2, nullable_col->size());
|
||||
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice2, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(3, nullable_col->size());
|
||||
// invalid jsonb data
|
||||
auto string_data = assert_cast<ColumnString*>(nullable_col->get_nested_column_ptr().get());
|
||||
auto& dat = string_data->get_chars();
|
||||
dat.emplace_back('s');
|
||||
|
||||
// deserialize null
|
||||
Slice slice_null = "NULL";
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice_null, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(4, nullable_col->size());
|
||||
|
||||
st = serde1->deserialize_one_cell_from_json(*nullable_col, slice3, options);
|
||||
ASSERT_TRUE(st.ok());
|
||||
ASSERT_EQ(5, nullable_col->size());
|
||||
|
||||
// 2. put column into block
|
||||
vectorized::ColumnWithTypeAndName argument1(nullable_col->assume_mutable(),
|
||||
nullable_dataTypeJsonb, "jsonb_column_null");
|
||||
block.clear();
|
||||
block.insert(argument1);
|
||||
|
||||
// 3. use OlapColumnDataConvertorVarchar::convert_to_olap to convert column data to segment file data
|
||||
_olap_data_convertor->reset();
|
||||
_olap_data_convertor->add_column_data_convertor(jsonb_column);
|
||||
_olap_data_convertor->set_source_content(&block, 0, 5);
|
||||
auto [status1, column1] = _olap_data_convertor->convert_column_data(0);
|
||||
ASSERT_FALSE(status.ok());
|
||||
ASSERT_TRUE(status.to_string().find("invalid json binary value") != std::string::npos);
|
||||
ASSERT_NE(column, nullptr);
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
Reference in New Issue
Block a user