From 1ae039e4aef8e1848921ecfdfc2613a04513b883 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Thu, 19 Jun 2025 11:06:05 +0800 Subject: [PATCH] [test](jsonb) Add test case for Jsonb format (#51876) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? This PR introduces a UT to prevent unintended changes to the Jsonb format. The Jsonb format is a stable data contract and should not be changed lightly, as it may break backward compatibility or affect persisted data. Changes 1. Added a dedicated test case that verifies the binary output of serialized Jsonb data. 2. The test includes hardcoded reference strings and binary blobs as golden values to validate serialization output. 3. This ensures any modification to the format will be caught early during development or code review. Notes • Developers should not remove or modify this test without a thorough review and a strong justification. • If the serialization format must change, please update the test accordingly and ensure compatibility is preserved. Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/test/util/jsonb_serialize_test.cpp | 242 ++++++++++++++++++ .../test_data/jsonb_serialize_test_data.bin | Bin 0 -> 320 bytes 2 files changed, 242 insertions(+) create mode 100644 be/test/util/jsonb_serialize_test.cpp create mode 100644 be/test/util/test_data/jsonb_serialize_test_data.bin diff --git a/be/test/util/jsonb_serialize_test.cpp b/be/test/util/jsonb_serialize_test.cpp new file mode 100644 index 0000000000..9ede7d4658 --- /dev/null +++ b/be/test/util/jsonb_serialize_test.cpp @@ -0,0 +1,242 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include + +#include "testutil/test_util.h" +#include "util/jsonb_parser_simd.h" +#include "util/jsonb_writer.h" + +// This test verifies the stability of Jsonb serialization format. +// DO NOT modify or remove this test! +// It serves as a guard against changes that may unintentionally break +// backward compatibility or introduce inconsistencies in serialized data. +// +// If the JSON format must be changed, please carefully review the impact +// on compatibility, and update this test accordingly with proper justification. +// NOTE: +// 1. Content of file `be/test/util/test_data/jsonb_serialize_test_data.bin` should NOT be modified. +// 2. Content of string `JSON_DATA_STRING` should NOT be modified. +// 3. If you need add more types in Jsonb format, you should write a new test case instead of modifying this one. + +namespace doris { +static inline const std::string JSON_DATA_STRING = R"({ + "string_example": "Hello, world!", + "int_number": 42, + "float_number": 3.14159, + "boolean_true": true, + "boolean_false": false, + "null_value": null, + "array_example": [ + "apple", + 123, + true, + null, + { + "nested_key": "nested_value" + } + ], + "object_example": { + "name": "Alice", + "age": 30, + "is_member": false, + "preferences": { + "colors": ["red", "green", "blue"], + "notifications": { + "email": true, + "sms": false + } + } + } +} +)"; + +class JsonbSerializeTest : public testing::Test { +public: + void SetUp() override { + // Setup code if needed + } + + void TearDown() override { + // Cleanup code if needed + } + + std::string load_serialized_jsonb_data() { + std::string dir_path = GetCurrentRunningDir(); + std::string serialized_data_path = dir_path + data_path; + std::ifstream ifs(serialized_data_path, std::ios::binary); + if (!ifs.is_open()) { + throw std::runtime_error("Failed to open file: " + serialized_data_path); + } + std::string data((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + ifs.close(); + return data; + } + + JsonbValue* check_object_and_get_value(JsonbValue* jsonb_value, const std::string& key) { + EXPECT_TRUE(jsonb_value->isObject()) << "JsonbValue is not an object"; + auto* object_val = static_cast(jsonb_value); + auto* value = object_val->find(key.c_str(), key.length()); + EXPECT_TRUE(value != nullptr) << "Value not found for key: " + key; + return value; + } + + void check_jsonb_value_type(JsonbValue* jsonb_value, JsonbType expected_type) { + ASSERT_TRUE(jsonb_value != nullptr) << "JsonbValue is null"; + ASSERT_TRUE(jsonb_value->type() == expected_type) + << "JsonbValue type does not match expected type: " + << static_cast(jsonb_value->type()) << " vs " + << static_cast(expected_type); + } + + template + void check_jsonb_value(JsonbValue* jsonb_value, const T& value) { + if constexpr (std::is_same_v) { + check_jsonb_value_type(jsonb_value, JsonbType::T_String); + auto* str_val = static_cast(jsonb_value); + ASSERT_EQ(std::string_view(str_val->getBlob(), str_val->length()), value) + << "String value does not match expected value"; + } else if constexpr (std::is_same_v) { + ASSERT_TRUE(jsonb_value->isTrue() || jsonb_value->isFalse()) + << "JsonbValue is not a boolean"; + ASSERT_EQ(jsonb_value->isTrue(), value) + << "Boolean value does not match expected value"; + } else if constexpr (std::is_integral_v) { + ASSERT_TRUE(jsonb_value->isInt()) << "JsonbValue is not an integer"; + auto* int_val = static_cast(jsonb_value); + ASSERT_EQ(int_val->val(), value) << "Integer value does not match expected value"; + } else if constexpr (std::is_floating_point_v) { + check_jsonb_value_type(jsonb_value, JsonbType::T_Double); + auto* double_val = static_cast(jsonb_value); + ASSERT_DOUBLE_EQ(double_val->val(), value) + << "Double value does not match expected value"; + } else { + FAIL() << "Unsupported type for JsonbValue check"; + } + } + + void check_jsonb_null(JsonbValue* jsonb_value) { + ASSERT_TRUE(jsonb_value != nullptr) << "JsonbValue is null"; + ASSERT_TRUE(jsonb_value->isNull()) << "JsonbValue is not null"; + } + +protected: + inline static std::string data_path = "/util/test_data/jsonb_serialize_test_data.bin"; +}; + +TEST_F(JsonbSerializeTest, serialize) { + JsonbParser parser; + ASSERT_TRUE(parser.parse(JSON_DATA_STRING)) << "Failed to parse JSON data"; + + auto& writer = parser.getWriter(); + auto* output = writer.getOutput(); + + ASSERT_TRUE(output != nullptr) << "Output stream is null"; + + auto serialized_data = load_serialized_jsonb_data(); + ASSERT_EQ(serialized_data.size(), output->getSize()) + << "Serialized data size does not match expected size"; + ASSERT_EQ(memcmp(output->getBuffer(), serialized_data.data(), serialized_data.size()), 0) + << "Serialized data does not match expected data"; + + auto* jsonb_doc = JsonbDocument::createDocument(output->getBuffer(), output->getSize()); + ASSERT_TRUE(jsonb_doc != nullptr) << "Failed to create JsonbDocument"; + + auto* jsonb_value = jsonb_doc->getValue(); + + auto* string_value = check_object_and_get_value(jsonb_value, "string_example"); + check_jsonb_value(string_value, std::string("Hello, world!")); + + auto* int_value = check_object_and_get_value(jsonb_value, "int_number"); + check_jsonb_value(int_value, 42); + + auto* float_value = check_object_and_get_value(jsonb_value, "float_number"); + check_jsonb_value(float_value, 3.14159); + + auto* boolean_true_value = check_object_and_get_value(jsonb_value, "boolean_true"); + check_jsonb_value(boolean_true_value, true); + + auto* boolean_false_value = check_object_and_get_value(jsonb_value, "boolean_false"); + check_jsonb_value(boolean_false_value, false); + + auto* null_value = check_object_and_get_value(jsonb_value, "null_value"); + check_jsonb_null(null_value); + + auto* array_value = check_object_and_get_value(jsonb_value, "array_example"); + ASSERT_TRUE(array_value->isArray()) << "JsonbValue is not an array"; + auto* array_val = static_cast(array_value); + ASSERT_EQ(array_val->numElem(), 5) << "Array size does not match expected size"; + + for (auto it = array_val->begin(); it != array_val->end(); ++it) { + auto* elem = &(*it); + if (elem->isString()) { + check_jsonb_value(elem, std::string("apple")); + } else if (elem->isInt()) { + check_jsonb_value(elem, 123); + } else if (elem->isTrue()) { + check_jsonb_value(elem, true); + } else if (elem->isNull()) { + check_jsonb_null(elem); + } else if (elem->isObject()) { + auto* nested_value = check_object_and_get_value(elem, "nested_key"); + check_jsonb_value(nested_value, std::string("nested_value")); + } + } + + auto* object_value = check_object_and_get_value(jsonb_value, "object_example"); + ASSERT_TRUE(object_value->isObject()) << "JsonbValue is not an object"; + + auto* nested_value = check_object_and_get_value(object_value, "name"); + check_jsonb_value(nested_value, std::string("Alice")); + + nested_value = check_object_and_get_value(object_value, "age"); + check_jsonb_value(nested_value, 30); + + nested_value = check_object_and_get_value(object_value, "is_member"); + check_jsonb_value(nested_value, false); + + nested_value = check_object_and_get_value(object_value, "preferences"); + auto* nested_obj_value = check_object_and_get_value(nested_value, "colors"); + + ASSERT_TRUE(nested_obj_value->isArray()) << "Preferences colors is not an array"; + auto* colors_array = static_cast(nested_obj_value); + ASSERT_EQ(colors_array->numElem(), 3) << "Colors array size does not match expected size"; + + std::vector expected_colors = {"red", "green", "blue"}; + int index = 0; + for (auto it = colors_array->begin(); it != colors_array->end(); ++it, ++index) { + auto* color_elem = &(*it); + ASSERT_TRUE(color_elem->isString()) << "Color element is not a string"; + auto* color_val = static_cast(color_elem); + ASSERT_EQ(std::string_view(color_val->getBlob(), color_val->length()), + expected_colors[index]) + << "Color value does not match expected value"; + } + + nested_value = check_object_and_get_value(nested_value, "notifications"); + ASSERT_TRUE(nested_value->isObject()) << "Notifications is not an object"; + + auto* email_value = check_object_and_get_value(nested_value, "email"); + check_jsonb_value(email_value, true); + auto* sms_value = check_object_and_get_value(nested_value, "sms"); + check_jsonb_value(sms_value, false); +} + +} // namespace doris diff --git a/be/test/util/test_data/jsonb_serialize_test_data.bin b/be/test/util/test_data/jsonb_serialize_test_data.bin new file mode 100644 index 0000000000000000000000000000000000000000..857a0294b6f9aea56fac286f06fef407b3995e05 GIT binary patch literal 320 zcmZSNvSMUl;43aE%FIiTPpwGIEyzjb;ALQ7@JP+c$=6XR&o9bJQRK?ZD~Zo5%}q)z zV%FkG%gIlKaoO|Qq(A&rvU`BA2GcYi)9tR5p14B|yX)14Ceo1CpW^!Ul XW`15V7sx5Bskw=nIgHH3xy4KX7J*#L literal 0 HcmV?d00001