Branch-2.1 [Fix](Variant) fix variant serialize to string (#47121) (#47147)

cherry-pick from #47121
This commit is contained in:
lihangyu
2025-01-18 09:12:39 +08:00
committed by GitHub
parent 9dffe5992e
commit 4f9562650d
10 changed files with 71 additions and 22 deletions

View File

@ -1270,20 +1270,46 @@ Status ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String
root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
}
Arena mem_pool;
bool serialize_root = true; // Assume all subcolumns are null by default
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
continue; // Skip the root column
}
// If any non-root subcolumn is NOT null, set serialize_root to false and exit early
if (!assert_cast<const ColumnNullable&>(*subcolumn->data.get_finalized_column_ptr())
.is_null_at(row)) {
serialize_root = false;
break;
}
}
#ifndef NDEBUG
VLOG_DEBUG << "dump structure " << JsonFunctions::print_json_value(*doc_structure);
#endif
if (serialize_root && !is_null_root()) {
// only serialize root when all other subcolumns is null
RETURN_IF_ERROR(
subcolumns.get_root()->data.get_least_common_type_serde()->write_one_cell_to_json(
subcolumns.get_root()->data.get_finalized_column(), root,
doc_structure->GetAllocator(), mem_pool, row));
output->Clear();
compact_null_values(root, doc_structure->GetAllocator());
rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
root.Accept(writer);
return Status::OK();
}
// handle subcolumns exclude root node
for (const auto& subcolumn : subcolumns) {
if (subcolumn->data.is_root) {
continue;
}
RETURN_IF_ERROR(find_and_set_leave_value(
subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
subcolumn->data.get_least_common_type_serde(),
subcolumn->data.get_least_common_type(),
subcolumn->data.least_common_type.get_base_type_id(), root,
doc_structure->GetAllocator(), mem_pool, row));
if (subcolumn->path.empty() && !root.IsObject()) {
// root was modified, only handle root node
break;
}
}
compact_null_values(root, doc_structure->GetAllocator());
if (root.IsNull() && is_null != nullptr) {

File diff suppressed because one or more lines are too long

View File

@ -31,7 +31,7 @@
-- !sql7 --
1 {"a":1,"b":{"c":[{"a":1}]}} 59
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
1029 \N 12
1029 {"a":1,"b":{"c":1}} 12
1999 {"a":1,"b":{"c":1}} 11
-- !sql8 --
@ -48,7 +48,7 @@
11 [123] 11
12 [123.2] 12
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
1029 \N 12
1029 {"a":1,"b":{"c":1}} 12
1999 {"a":1,"b":{"c":1}} 11
19921 {"a":1,"d":10} 11

View File

@ -225,8 +225,8 @@
{"c":[{"a":1}]} [{"a":1}]
{"c":[{"a":1}]} [{"a":1}]
{"c":1} 1
{} \N
{} \N
null \N
null \N
-- !sql_11 --
1 {"x":[1]}
@ -329,8 +329,8 @@
{"c":[{"a":1}]} [{"a":1}]
{"c":[{"a":1}]} [{"a":1}]
{"c":1} 1
{} \N
{} \N
null \N
null \N
-- !sql_11 --
1 {"x":[1]}

View File

@ -79,14 +79,14 @@ suite("regression_test_variant_logdata", "nonConcurrent,p2"){
sql "truncate table ${table_name}"
// 0.95 default ratio
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""")
qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
qt_sql_33_1 """select cast(v['json']['parseFailed'] as string) from logdata where cast(v['json']['parseFailed'] as string) is not null and k = 162 limit 1;"""
sql "truncate table ${table_name}"
// always sparse column
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""")
qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
sql "truncate table ${table_name}"
@ -95,4 +95,4 @@ suite("regression_test_variant_logdata", "nonConcurrent,p2"){
// TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse)
// unique table
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
}
}

View File

@ -25,7 +25,7 @@ suite("regression_test_variant_agg"){
)
AGGREGATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 4
properties("replication_num" = "1", "disable_auto_compaction" = "false");
properties("replication_num" = "1", "disable_auto_compaction" = "true");
"""
sql """insert into var_agg values (1, '[1]', 1),(1, '{"a" : 1}', 1);"""
sql """insert into var_agg values (2, '[2]', 2),(1, '{"a" : [[[1]]]}', 2);"""
@ -42,10 +42,10 @@ suite("regression_test_variant_agg"){
qt_sql1 "select k, cast(v['a'] as array<int>) from var_agg where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from var_agg where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
qt_sql3 "select k, v from var_agg order by k, cast(v as string) limit 5"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k,cast(v as string) desc limit 10000;"
qt_sql5 "select v['b'] from var_agg where cast(v['b'] as int) > 0;"
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
qt_sql8 "select * from var_agg order by 1, cast(2 as string), 3"
sql "alter table var_agg drop column s"
sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str

View File

@ -53,7 +53,7 @@ suite("regression_test_variant_delete_and_update", "variant_type"){
)
UNIQUE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 4
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "store_row_column" = "true");
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true");
"""
sql "insert into var_delete_update_mow select k, cast(v as string), cast(v as string) from var_delete_update"
sql "delete from ${table_name} where k = 1"

View File

@ -257,6 +257,6 @@ suite("regression_test_variant_desc", "nonConcurrent"){
sql "desc large_tablets"
} finally {
// reset flags
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
}
}

View File

@ -97,9 +97,9 @@ suite("regression_test_variant", "p0"){
qt_sql1 "select k, cast(v['a'] as array<int>) from ${table_name} where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from ${table_name} where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
qt_sql3 "select k, v from ${table_name} order by k, cast(v as string) limit 5"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from ${table_name} where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from ${table_name} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
qt_sql5 "select v['b'] from ${table_name} where cast(v['b'] as int) > 0;"
qt_sql6 "select cast(v['b'] as string) from ${table_name} where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
qt_sql6 "select cast(v['b'] as string) from ${table_name} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
// verify table_name
}
sql "insert into simple_variant_DUPLICATE select k, cast(v as string) from simple_variant_UNIQUE;"

View File

@ -166,6 +166,6 @@ suite("load_p2", "variant_type,p2"){
qt_sql("select count() from github_events")
} finally {
// reset flags
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
// set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
}
}