From 9490d5e9a26497779cd02ed80bb56020f7ffbaa0 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Tue, 2 Jan 2024 20:28:59 +0800 Subject: [PATCH] [Debug](Variant) sanitize variant in `write_column_to_mysql` (#29380) --- be/src/vec/columns/column_object.cpp | 34 +++++++++++++++++++ be/src/vec/columns/column_object.h | 5 +++ .../serde/data_type_object_serde.cpp | 2 ++ 3 files changed, 41 insertions(+) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index fb2bf8c907..8a21bc76d2 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -27,6 +27,8 @@ #include #include +#include +#include #include #include #include @@ -1473,4 +1475,36 @@ void ColumnObject::for_each_imutable_subcolumn(ImutableColumnCallback callback) } } +std::string ColumnObject::debug_string() const { + std::stringstream res; + res << get_family_name() << "(num_row = " << num_rows; + for (auto& entry : subcolumns) { + if (entry->data.is_finalized()) { + res << "[column:" << entry->data.data[0]->dump_structure() + << ",type:" << entry->data.data_types[0]->get_name() + << ",path:" << entry->path.get_path() << "],"; + } + } + res << ")"; + return res.str(); +} + +Status ColumnObject::sanitize() const { + RETURN_IF_CATCH_EXCEPTION(check_consistency()); + for (const auto& subcolumn : subcolumns) { + if (subcolumn->data.is_finalized()) { + auto column = subcolumn->data.get_least_common_type()->create_column(); + std::string original = subcolumn->data.get_finalized_column().get_family_name(); + std::string expected = column->get_family_name(); + if (original != expected) { + return Status::InternalError("Incompatible type between {} and {}, debug_info:", + original, expected, debug_string()); + } + } + } + + VLOG_DEBUG << "sanitized " << debug_string(); + return Status::OK(); +} + } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 385411cb48..86f3bd19ce 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -476,5 +476,10 @@ public: void strip_outer_array(); bool empty() const; + + // Check if all columns and types are aligned + Status sanitize() const; + + std::string debug_string() const; }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp b/be/src/vec/data_types/serde/data_type_object_serde.cpp index 94ee6a44da..5b227951e4 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp @@ -19,6 +19,7 @@ #include +#include "common/status.h" #include "vec/columns/column_object.h" #include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" @@ -34,6 +35,7 @@ Status DataTypeObjectSerDe::write_column_to_mysql(const IColumn& column, if (!variant.is_finalized()) { const_cast(variant).finalize(); } + RETURN_IF_ERROR(variant.sanitize()); if (variant.is_scalar_variant()) { // Serialize scalar types, like int, string, array, faster path const auto& root = variant.get_subcolumn({});