cherry-pick from #47121
This commit is contained in:
@ -1270,20 +1270,46 @@ Status ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String
|
||||
root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
|
||||
}
|
||||
Arena mem_pool;
|
||||
|
||||
bool serialize_root = true; // Assume all subcolumns are null by default
|
||||
for (const auto& subcolumn : subcolumns) {
|
||||
if (subcolumn->data.is_root) {
|
||||
continue; // Skip the root column
|
||||
}
|
||||
|
||||
// If any non-root subcolumn is NOT null, set serialize_root to false and exit early
|
||||
if (!assert_cast<const ColumnNullable&>(*subcolumn->data.get_finalized_column_ptr())
|
||||
.is_null_at(row)) {
|
||||
serialize_root = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
VLOG_DEBUG << "dump structure " << JsonFunctions::print_json_value(*doc_structure);
|
||||
#endif
|
||||
if (serialize_root && !is_null_root()) {
|
||||
// only serialize root when all other subcolumns is null
|
||||
RETURN_IF_ERROR(
|
||||
subcolumns.get_root()->data.get_least_common_type_serde()->write_one_cell_to_json(
|
||||
subcolumns.get_root()->data.get_finalized_column(), root,
|
||||
doc_structure->GetAllocator(), mem_pool, row));
|
||||
output->Clear();
|
||||
compact_null_values(root, doc_structure->GetAllocator());
|
||||
rapidjson::Writer<rapidjson::StringBuffer> writer(*output);
|
||||
root.Accept(writer);
|
||||
return Status::OK();
|
||||
}
|
||||
// handle subcolumns exclude root node
|
||||
for (const auto& subcolumn : subcolumns) {
|
||||
if (subcolumn->data.is_root) {
|
||||
continue;
|
||||
}
|
||||
RETURN_IF_ERROR(find_and_set_leave_value(
|
||||
subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
|
||||
subcolumn->data.get_least_common_type_serde(),
|
||||
subcolumn->data.get_least_common_type(),
|
||||
subcolumn->data.least_common_type.get_base_type_id(), root,
|
||||
doc_structure->GetAllocator(), mem_pool, row));
|
||||
if (subcolumn->path.empty() && !root.IsObject()) {
|
||||
// root was modified, only handle root node
|
||||
break;
|
||||
}
|
||||
}
|
||||
compact_null_values(root, doc_structure->GetAllocator());
|
||||
if (root.IsNull() && is_null != nullptr) {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -31,7 +31,7 @@
|
||||
-- !sql7 --
|
||||
1 {"a":1,"b":{"c":[{"a":1}]}} 59
|
||||
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
|
||||
1029 \N 12
|
||||
1029 {"a":1,"b":{"c":1}} 12
|
||||
1999 {"a":1,"b":{"c":1}} 11
|
||||
|
||||
-- !sql8 --
|
||||
@ -48,7 +48,7 @@
|
||||
11 [123] 11
|
||||
12 [123.2] 12
|
||||
1022 {"a":1,"b":{"f":17034,"g":1.111}} 12
|
||||
1029 \N 12
|
||||
1029 {"a":1,"b":{"c":1}} 12
|
||||
1999 {"a":1,"b":{"c":1}} 11
|
||||
19921 {"a":1,"d":10} 11
|
||||
|
||||
|
||||
@ -225,8 +225,8 @@
|
||||
{"c":[{"a":1}]} [{"a":1}]
|
||||
{"c":[{"a":1}]} [{"a":1}]
|
||||
{"c":1} 1
|
||||
{} \N
|
||||
{} \N
|
||||
null \N
|
||||
null \N
|
||||
|
||||
-- !sql_11 --
|
||||
1 {"x":[1]}
|
||||
@ -329,8 +329,8 @@
|
||||
{"c":[{"a":1}]} [{"a":1}]
|
||||
{"c":[{"a":1}]} [{"a":1}]
|
||||
{"c":1} 1
|
||||
{} \N
|
||||
{} \N
|
||||
null \N
|
||||
null \N
|
||||
|
||||
-- !sql_11 --
|
||||
1 {"x":[1]}
|
||||
|
||||
@ -79,14 +79,14 @@ suite("regression_test_variant_logdata", "nonConcurrent,p2"){
|
||||
sql "truncate table ${table_name}"
|
||||
|
||||
// 0.95 default ratio
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
|
||||
load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""")
|
||||
qt_sql_33 """ select json_extract(v,"\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
|
||||
qt_sql_33_1 """select cast(v['json']['parseFailed'] as string) from logdata where cast(v['json']['parseFailed'] as string) is not null and k = 162 limit 1;"""
|
||||
sql "truncate table ${table_name}"
|
||||
|
||||
// always sparse column
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
|
||||
load_json_data.call(table_name, """${getS3Url() + '/load/logdata.json'}""")
|
||||
qt_sql_34 """ select json_extract(v, "\$.json.parseFailed") from logdata where json_extract(v,"\$.json.parseFailed") != 'null' order by k limit 1;"""
|
||||
sql "truncate table ${table_name}"
|
||||
@ -95,4 +95,4 @@ suite("regression_test_variant_logdata", "nonConcurrent,p2"){
|
||||
// TODO add test case that some certain columns are materialized in some file while others are not materilized(sparse)
|
||||
// unique table
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ suite("regression_test_variant_agg"){
|
||||
)
|
||||
AGGREGATE KEY(`k`)
|
||||
DISTRIBUTED BY HASH(k) BUCKETS 4
|
||||
properties("replication_num" = "1", "disable_auto_compaction" = "false");
|
||||
properties("replication_num" = "1", "disable_auto_compaction" = "true");
|
||||
"""
|
||||
sql """insert into var_agg values (1, '[1]', 1),(1, '{"a" : 1}', 1);"""
|
||||
sql """insert into var_agg values (2, '[2]', 2),(1, '{"a" : [[[1]]]}', 2);"""
|
||||
@ -42,10 +42,10 @@ suite("regression_test_variant_agg"){
|
||||
qt_sql1 "select k, cast(v['a'] as array<int>) from var_agg where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
|
||||
qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from var_agg where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
|
||||
qt_sql3 "select k, v from var_agg order by k, cast(v as string) limit 5"
|
||||
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
|
||||
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k,cast(v as string) desc limit 10000;"
|
||||
qt_sql5 "select v['b'] from var_agg where cast(v['b'] as int) > 0;"
|
||||
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
|
||||
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
|
||||
qt_sql6 "select cast(v['b'] as string) from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
|
||||
qt_sql7 "select * from var_agg where cast(v['b'] as string) is not null and length(v['b']) >4 order by k, cast(v['b'] as string) "
|
||||
qt_sql8 "select * from var_agg order by 1, cast(2 as string), 3"
|
||||
sql "alter table var_agg drop column s"
|
||||
sql """insert into var_agg select 5, '{"a" : 1234, "xxxx" : "fffff", "point" : 42000}' as json_str
|
||||
|
||||
@ -53,7 +53,7 @@ suite("regression_test_variant_delete_and_update", "variant_type"){
|
||||
)
|
||||
UNIQUE KEY(`k`)
|
||||
DISTRIBUTED BY HASH(k) BUCKETS 4
|
||||
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true", "store_row_column" = "true");
|
||||
properties("replication_num" = "1", "enable_unique_key_merge_on_write" = "true");
|
||||
"""
|
||||
sql "insert into var_delete_update_mow select k, cast(v as string), cast(v as string) from var_delete_update"
|
||||
sql "delete from ${table_name} where k = 1"
|
||||
|
||||
@ -257,6 +257,6 @@ suite("regression_test_variant_desc", "nonConcurrent"){
|
||||
sql "desc large_tablets"
|
||||
} finally {
|
||||
// reset flags
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1")
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,9 +97,9 @@ suite("regression_test_variant", "p0"){
|
||||
qt_sql1 "select k, cast(v['a'] as array<int>) from ${table_name} where size(cast(v['a'] as array<int>)) > 0 order by k, cast(v['a'] as string) asc"
|
||||
qt_sql2 "select k, cast(v as int), cast(v['b'] as string) from ${table_name} where length(cast(v['b'] as string)) > 4 order by k, cast(v as string), cast(v['b'] as string) "
|
||||
qt_sql3 "select k, v from ${table_name} order by k, cast(v as string) limit 5"
|
||||
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from ${table_name} where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
|
||||
qt_sql4 "select v['b'], v['b']['c'], cast(v as int) from ${table_name} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k,cast(v as string) desc limit 10000;"
|
||||
qt_sql5 "select v['b'] from ${table_name} where cast(v['b'] as int) > 0;"
|
||||
qt_sql6 "select cast(v['b'] as string) from ${table_name} where cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
|
||||
qt_sql6 "select cast(v['b'] as string) from ${table_name} where cast(v['b'] as string) != 'null' and cast(v['b'] as string) is not null and cast(v['b'] as string) != '{}' order by k, cast(v['b'] as string) "
|
||||
// verify table_name
|
||||
}
|
||||
sql "insert into simple_variant_DUPLICATE select k, cast(v as string) from simple_variant_UNIQUE;"
|
||||
|
||||
@ -166,6 +166,6 @@ suite("load_p2", "variant_type,p2"){
|
||||
qt_sql("select count() from github_events")
|
||||
} finally {
|
||||
// reset flags
|
||||
set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
|
||||
// set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "0.95")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user