From 0df32c8e3e9da051ea4cbab55fdd299e34fc8428 Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Sat, 7 Oct 2023 22:50:44 +0800 Subject: [PATCH] [Fix](Outfile) Use data_type_serde to export data to csv file format (#24721) Modify the outfile logic, use the data type serde framework. --- .../serde/data_type_array_serde.cpp | 19 +- .../data_types/serde/data_type_array_serde.h | 9 +- .../data_types/serde/data_type_bitmap_serde.h | 16 +- .../serde/data_type_date64_serde.cpp | 29 +-- .../data_types/serde/data_type_date64_serde.h | 18 +- .../serde/data_type_datetimev2_serde.cpp | 17 +- .../serde/data_type_datetimev2_serde.h | 9 +- .../serde/data_type_datev2_serde.cpp | 16 +- .../data_types/serde/data_type_datev2_serde.h | 9 +- .../serde/data_type_decimal_serde.cpp | 17 +- .../serde/data_type_decimal_serde.h | 9 +- .../serde/data_type_fixedlengthobject_serde.h | 16 +- .../data_types/serde/data_type_hll_serde.cpp | 27 ++- .../data_types/serde/data_type_hll_serde.h | 9 +- .../serde/data_type_jsonb_serde.cpp | 16 +- .../data_types/serde/data_type_jsonb_serde.h | 9 +- .../data_types/serde/data_type_map_serde.cpp | 20 +- .../data_types/serde/data_type_map_serde.h | 9 +- .../serde/data_type_nullable_serde.cpp | 36 +++- .../serde/data_type_nullable_serde.h | 13 +- .../serde/data_type_number_serde.cpp | 19 +- .../data_types/serde/data_type_number_serde.h | 9 +- .../data_types/serde/data_type_object_serde.h | 16 +- .../serde/data_type_quantilestate_serde.h | 16 +- be/src/vec/data_types/serde/data_type_serde.h | 36 ++-- .../serde/data_type_string_serde.cpp | 16 +- .../data_types/serde/data_type_string_serde.h | 9 +- .../serde/data_type_struct_serde.cpp | 29 +++ .../data_types/serde/data_type_struct_serde.h | 15 +- be/src/vec/runtime/vcsv_transformer.cpp | 166 ++------------ be/src/vec/runtime/vcsv_transformer.h | 5 +- be/src/vec/runtime/vfile_format_transformer.h | 1 + .../serde/data_type_serde_csv_test.cpp | 13 +- .../serde/data_type_serde_text_test.cpp | 204 +++++++++++------- 34 files changed, 449 insertions(+), 428 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 21b9b14c21..2b76797eb6 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -31,15 +31,16 @@ namespace doris { namespace vectorized { class Arena; -void DataTypeArraySerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeArraySerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -57,8 +58,10 @@ void DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int r // add ' ' to keep same with origin format with array options.field_delim = options.collection_delim; options.field_delim += " "; - nested_serde->serialize_column_to_json(nested_column, offset, next_offset, bw, options); + RETURN_IF_ERROR(nested_serde->serialize_column_to_json(nested_column, offset, next_offset, bw, + options, nesting_level + 1)); bw.write("]", 1); + return Status::OK(); } Status DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index e93b1414dc..531187e762 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -38,11 +38,12 @@ class DataTypeArraySerDe : public DataTypeSerDe { public: DataTypeArraySerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h b/be/src/vec/data_types/serde/data_type_bitmap_serde.h index c53a52caf0..7364a22326 100644 --- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h +++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h @@ -33,16 +33,16 @@ class Arena; class DataTypeBitMapSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "write_column_to_pb with type " + column.get_name()); + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name()); } - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "write_column_to_pb with type " + column.get_name()); + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name()); } Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.cpp b/be/src/vec/data_types/serde/data_type_date64_serde.cpp index 8c538710c9..35d057d13e 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_date64_serde.cpp @@ -27,15 +27,16 @@ namespace doris { namespace vectorized { -void DataTypeDate64SerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDate64SerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -59,6 +60,7 @@ void DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int char* pos = value.to_string(buf); bw.write(buf, pos - buf - 1); } + return Status::OK(); } Status DataTypeDate64SerDe::deserialize_column_from_json_vector(IColumn& column, @@ -92,15 +94,13 @@ Status DataTypeDate64SerDe::deserialize_one_cell_from_json(IColumn& column, Slic return Status::OK(); } -void DataTypeDateTimeSerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() -} +Status DataTypeDateTimeSerDe::serialize_column_to_json( + const IColumn& column, int start_idx, int end_idx, BufferWritable& bw, + FormatOptions& options, int nesting_level) const {SERIALIZE_COLUMN_TO_JSON()} -void DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -129,6 +129,7 @@ void DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, in char* pos = value.to_string(buf); bw.write(buf, pos - buf - 1); } + return Status::OK(); } Status DataTypeDateTimeSerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_date64_serde.h b/be/src/vec/data_types/serde/data_type_date64_serde.h index 560d1cad0c..4374684d7b 100644 --- a/be/src/vec/data_types/serde/data_type_date64_serde.h +++ b/be/src/vec/data_types/serde/data_type_date64_serde.h @@ -42,10 +42,11 @@ namespace vectorized { class Arena; class DataTypeDate64SerDe : public DataTypeNumberSerDe { - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level = 1) const override; @@ -75,11 +76,12 @@ private: }; class DataTypeDateTimeSerDe : public DataTypeDate64SerDe { - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 3e7b1b7bab..73d60c2fdb 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -27,15 +27,17 @@ namespace doris { namespace vectorized { -void DataTypeDateTimeV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeDateTimeV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -55,6 +57,7 @@ void DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column, char* pos = val.to_string(buf); bw.write(buf, pos - buf - 1); } + return Status::OK(); } Status DataTypeDateTimeV2SerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index 8cc26a550d..c5695c074d 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -45,11 +45,12 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe { public: DataTypeDateTimeV2SerDe(int scale) : scale(scale) {}; - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index 663801ffbf..274ddc619a 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -27,15 +27,16 @@ namespace doris { namespace vectorized { -void DataTypeDateV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeDateV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -47,6 +48,7 @@ void DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int char* pos = val.to_string(buf); // DateTime to_string the end is /0 bw.write(buf, pos - buf - 1); + return Status::OK(); } Status DataTypeDateV2SerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 3610ebb56f..e235f8416e 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -42,10 +42,11 @@ namespace vectorized { class Arena; class DataTypeDateV2SerDe : public DataTypeNumberSerDe { - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index 1a7732aad4..e6628d8c5b 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -32,16 +32,18 @@ namespace doris { namespace vectorized { template -void DataTypeDecimalSerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeDecimalSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } template -void DataTypeDecimalSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeDecimalSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -55,6 +57,7 @@ void DataTypeDecimalSerDe::serialize_one_cell_to_json(const IColumn& column, auto length = col.get_element(row_num).to_string(buf, scale, scale_multiplier); bw.write(buf, length); } + return Status::OK(); } template diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 697f05569a..5085d40361 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -69,11 +69,12 @@ public: precision(precision_), scale_multiplier(decimal_scale_multiplier(scale)) {} - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h index 6546281904..82c8f456b9 100644 --- a/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h +++ b/be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h @@ -36,16 +36,16 @@ class Arena; class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_one_cell_to_text with type " + column.get_name()); + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name()); } - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_column_to_text with type " + column.get_name()); + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name()); } Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp b/be/src/vec/data_types/serde/data_type_hll_serde.cpp index d7057e77d9..eb0cb30919 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp @@ -31,20 +31,34 @@ #include "vec/columns/column_const.h" #include "vec/common/arena.h" #include "vec/common/assert_cast.h" +#include "vec/data_types/serde/data_type_nullable_serde.h" namespace doris { namespace vectorized { class IColumn; -void DataTypeHLLSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeHLLSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { + if (!options._output_object_data) { + /** + * For null values in ordinary types, we use \N to represent them; + * for null values in nested types, we use null to represent them, just like the json format. + */ + if (nesting_level >= 2) { + bw.write(DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE.c_str(), 4); + } else { + bw.write(DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2); + } + return Status::OK(); + } auto col_row = check_column_const_set_readability(column, row_num); ColumnPtr ptr = col_row.first; row_num = col_row.second; @@ -52,6 +66,7 @@ void DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row std::unique_ptr buf = std::make_unique(data.max_serialized_size()); size_t size = data.serialize((uint8*)buf.get()); bw.write(buf.get(), size); + return Status::OK(); } Status DataTypeHLLSerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h b/be/src/vec/data_types/serde/data_type_hll_serde.h index ec2b271ff4..c4aac03c34 100644 --- a/be/src/vec/data_types/serde/data_type_hll_serde.h +++ b/be/src/vec/data_types/serde/data_type_hll_serde.h @@ -33,10 +33,11 @@ class Arena; class DataTypeHLLSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 678b071e0a..e1d8cb40a5 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -55,15 +55,16 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const IColumn& column, return _write_column_to_mysql(column, row_buffer, row_idx, col_const); } -void DataTypeJsonbSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeJsonbSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -72,6 +73,7 @@ void DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int r if (s.size > 0) { bw.write(s.data, s.size); } + return Status::OK(); } Status DataTypeJsonbSerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 8bfa8b7b6c..58b32dc462 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -42,10 +42,11 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { arrow::ArrayBuilder* array_builder, int start, int end) const override; - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp b/be/src/vec/data_types/serde/data_type_map_serde.cpp index dceec0ccac..063cfa1ee2 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp @@ -29,14 +29,15 @@ namespace doris { namespace vectorized { class Arena; -void DataTypeMapSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeMapSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -55,11 +56,14 @@ void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row bw.write(&options.collection_delim, 1); bw.write(" ", 1); } - key_serde->serialize_one_cell_to_json(nested_keys_column, i, bw, options); + RETURN_IF_ERROR(key_serde->serialize_one_cell_to_json(nested_keys_column, i, bw, options, + nesting_level + 1)); bw.write(&options.map_key_delim, 1); - value_serde->serialize_one_cell_to_json(nested_values_column, i, bw, options); + RETURN_IF_ERROR(value_serde->serialize_one_cell_to_json(nested_values_column, i, bw, + options, nesting_level + 1)); } bw.write("}", 1); + return Status::OK(); } Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice, diff --git a/be/src/vec/data_types/serde/data_type_map_serde.h b/be/src/vec/data_types/serde/data_type_map_serde.h index e1cdc896c4..18649f56fb 100644 --- a/be/src/vec/data_types/serde/data_type_map_serde.h +++ b/be/src/vec/data_types/serde/data_type_map_serde.h @@ -39,10 +39,11 @@ public: DataTypeMapSerDe(const DataTypeSerDeSPtr& _key_serde, const DataTypeSerDeSPtr& _value_serde) : key_serde(_key_serde), value_serde(_value_serde) {} - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index e5c6e5c45d..60517d0106 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -33,32 +33,43 @@ #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" #include "vec/data_types/serde/data_type_serde.h" +#include "vec/runtime/vcsv_transformer.h" namespace doris { namespace vectorized { class Arena; -void DataTypeNullableSerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeNullableSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeNullableSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeNullableSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; const auto& col_null = assert_cast(*ptr); if (col_null.is_null_at(row_num)) { - bw.write("NULL", 4); + /** + * For null values in ordinary types, we use \N to represent them; + * for null values in nested types, we use null to represent them, just like the json format. + */ + if (nesting_level >= 2) { + bw.write(NULL_IN_CSV_FOR_NESTED_TYPE.c_str(), 4); + } else { + bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2); + } } else { - nested_serde->serialize_one_cell_to_json(col_null.get_nested_column(), row_num, bw, - options); + RETURN_IF_ERROR(nested_serde->serialize_one_cell_to_json( + col_null.get_nested_column(), row_num, bw, options, nesting_level)); } + return Status::OK(); } Status DataTypeNullableSerDe::deserialize_column_from_json_vector(IColumn& column, @@ -80,7 +91,7 @@ void DataTypeNullableSerDe::serialize_one_cell_to_hive_text(const IColumn& colum const auto& col_null = assert_cast(*ptr); if (col_null.is_null_at(row_num)) { - bw.write("\\N", 2); + bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2); } else { nested_serde->serialize_one_cell_to_hive_text(col_null.get_nested_column(), row_num, bw, options, nesting_level); @@ -327,5 +338,8 @@ Status DataTypeNullableSerDe::write_column_to_orc(const IColumn& column, const N return Status::OK(); } +const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N"; +const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE = "null"; + } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index cde3c9db35..8044681402 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -35,10 +35,11 @@ class DataTypeNullableSerDe : public DataTypeSerDe { public: DataTypeNullableSerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {} - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level = 1) const override; @@ -88,6 +89,10 @@ public: nested_serde->set_return_object_as_string(value); } + static const std::string NULL_IN_CSV_FOR_ORDINARY_TYPE; + + static const std::string NULL_IN_CSV_FOR_NESTED_TYPE; + private: template Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 4bbeb248d1..a5a915d15a 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -63,7 +63,7 @@ using DORIS_NUMERIC_ARROW_BUILDER = arrow::Int64Builder, UInt128, arrow::FixedSizeBinaryBuilder, Int128, arrow::FixedSizeBinaryBuilder, Float32, arrow::FloatBuilder, Float64, arrow::DoubleBuilder, void, - void // 添加这一行来表示TypeMap的末端 + void // Add this line to represent the end of the TypeMap >; template @@ -137,16 +137,18 @@ Status DataTypeNumberSerDe::deserialize_one_cell_from_json(IColumn& column, S } template -void DataTypeNumberSerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeNumberSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } template -void DataTypeNumberSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeNumberSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; @@ -162,6 +164,7 @@ void DataTypeNumberSerDe::serialize_one_cell_to_json(const IColumn& column, i } else if constexpr (std::is_integral::value || std::numeric_limits::is_iec559) { bw.write_number(data); } + return Status::OK(); } template diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index b37c1578a8..558eabee45 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -55,10 +55,11 @@ class DataTypeNumberSerDe : public DataTypeSerDe { public: using ColumnType = ColumnVector; - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h b/be/src/vec/data_types/serde/data_type_object_serde.h index cfa8343624..242833911a 100644 --- a/be/src/vec/data_types/serde/data_type_object_serde.h +++ b/be/src/vec/data_types/serde/data_type_object_serde.h @@ -36,16 +36,16 @@ class Arena; class DataTypeObjectSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_one_cell_to_text with type " + column.get_name()); + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name()); } - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_column_to_text with type " + column.get_name()); + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name()); } Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h index bcb099f696..b19d5ba25a 100644 --- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h +++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h @@ -39,16 +39,16 @@ namespace vectorized { class DataTypeQuantileStateSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_one_cell_to_text with type " + column.get_name()); + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name()); } - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_column_to_text with type " + column.get_name()); + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override { + return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name()); } Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 7125ad589d..a6487b1dc8 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -45,13 +45,14 @@ namespace orc { struct ColumnVectorBatch; } // namespace orc -#define SERIALIZE_COLUMN_TO_JSON() \ - for (size_t i = start_idx; i < end_idx; ++i) { \ - if (i != start_idx) { \ - bw.write(options.field_delim.data(), options.field_delim.size()); \ - } \ - serialize_one_cell_to_json(column, i, bw, options); \ - } +#define SERIALIZE_COLUMN_TO_JSON() \ + for (size_t i = start_idx; i < end_idx; ++i) { \ + if (i != start_idx) { \ + bw.write(options.field_delim.data(), options.field_delim.size()); \ + } \ + RETURN_IF_ERROR(serialize_one_cell_to_json(column, i, bw, options, nesting_level)); \ + } \ + return Status::OK(); #define DESERIALIZE_COLUMN_FROM_JSON_VECTOR() \ for (int i = 0; i < slices.size(); ++i) { \ @@ -135,6 +136,11 @@ public: char escape_char = 0; + /** + * only used for export data + */ + bool _output_object_data = true; + [[nodiscard]] char get_collection_delimiter(int nesting_level) const { CHECK(0 <= nesting_level && nesting_level <= 153); @@ -187,12 +193,14 @@ public: DataTypeSerDe(); virtual ~DataTypeSerDe(); // Text serializer and deserializer with formatOptions to handle different text format - virtual void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const = 0; + virtual Status serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const = 0; // this function serialize multi-column to one row text to avoid virtual function call in complex type nested loop - virtual void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const = 0; + virtual Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const = 0; virtual Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, @@ -219,7 +227,11 @@ public: virtual void serialize_one_cell_to_hive_text(const IColumn& column, int row_num, BufferWritable& bw, FormatOptions& options, int nesting_level = 1) const { - serialize_one_cell_to_json(column, row_num, bw, options); + Status st = serialize_one_cell_to_json(column, row_num, bw, options); + if (!st.ok()) { + throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, + "serialize_one_cell_to_json error: {}", st.to_string()); + } } // Protobuf serializer and deserializer diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp b/be/src/vec/data_types/serde/data_type_string_serde.cpp index 77616347d9..0eba61ec5f 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp @@ -33,21 +33,23 @@ namespace doris { namespace vectorized { class Arena; -void DataTypeStringSerDe::serialize_column_to_json(const IColumn& column, int start_idx, - int end_idx, BufferWritable& bw, - FormatOptions& options) const { - SERIALIZE_COLUMN_TO_JSON() +Status DataTypeStringSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); } -void DataTypeStringSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, - BufferWritable& bw, - FormatOptions& options) const { +Status DataTypeStringSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; row_num = result.second; const auto& value = assert_cast(*ptr).get_data_at(row_num); bw.write(value.data, value.size); + return Status::OK(); } Status DataTypeStringSerDe::deserialize_column_from_json_vector(IColumn& column, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index eb4c5627a0..a2e5758bf8 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -33,11 +33,12 @@ class Arena; class DataTypeStringSerDe : public DataTypeSerDe { public: - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override; + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override; + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index 9f8078f4b3..28e52944da 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -18,6 +18,7 @@ #include "data_type_struct_serde.h" #include "arrow/array/builder_nested.h" +#include "common/status.h" #include "util/jsonb_document.h" #include "vec/columns/column.h" #include "vec/columns/column_const.h" @@ -39,6 +40,34 @@ std::optional DataTypeStructSerDe::try_get_position_by_name(const String return std::nullopt; } +Status DataTypeStructSerDe::serialize_column_to_json(const IColumn& column, int start_idx, + int end_idx, BufferWritable& bw, + FormatOptions& options, + int nesting_level) const { + SERIALIZE_COLUMN_TO_JSON(); +} + +Status DataTypeStructSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num, + BufferWritable& bw, FormatOptions& options, + int nesting_level) const { + auto result = check_column_const_set_readability(column, row_num); + ColumnPtr ptr = result.first; + row_num = result.second; + + const ColumnStruct& struct_column = assert_cast(*ptr); + bw.write('{'); + for (int i = 0; i < struct_column.get_columns().size(); i++) { + if (i != 0) { + bw.write(','); + bw.write(' '); + } + RETURN_IF_ERROR(elemSerDeSPtrs[i]->serialize_one_cell_to_json( + struct_column.get_column(i), row_num, bw, options, nesting_level + 1)); + } + bw.write('}'); + return Status::OK(); +} + Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, int nesting_level) const { diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.h b/be/src/vec/data_types/serde/data_type_struct_serde.h index af10e2e87a..ad9a0e43a4 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.h +++ b/be/src/vec/data_types/serde/data_type_struct_serde.h @@ -108,17 +108,12 @@ public: DataTypeStructSerDe(const DataTypeSerDeSPtrs& _elemSerDeSPtrs, const Strings names) : elemSerDeSPtrs(_elemSerDeSPtrs), elemNames(names) {} - void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, - FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_one_cell_to_json with type " + column.get_name()); - } + Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw, + FormatOptions& options, int nesting_level = 1) const override; - void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, - BufferWritable& bw, FormatOptions& options) const override { - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, - "serialize_column_to_json with type " + column.get_name()); - } + Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx, + BufferWritable& bw, FormatOptions& options, + int nesting_level = 1) const override; Status deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/runtime/vcsv_transformer.cpp b/be/src/vec/runtime/vcsv_transformer.cpp index 1db5440b9c..1cbdb7ea05 100644 --- a/be/src/vec/runtime/vcsv_transformer.cpp +++ b/be/src/vec/runtime/vcsv_transformer.cpp @@ -24,6 +24,7 @@ #include #include +#include "common/status.h" #include "gutil/strings/numbers.h" #include "io/fs/file_writer.h" #include "runtime/define_primitive_type.h" @@ -41,9 +42,12 @@ #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" #include "vec/common/pod_array.h" +#include "vec/common/string_buffer.hpp" #include "vec/common/string_ref.h" #include "vec/core/column_with_type_and_name.h" #include "vec/core/types.h" +#include "vec/data_types/serde/data_type_serde.h" +#include "vec/exec/format/csv/csv_reader.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" #include "vec/runtime/vdatetime_value.h" @@ -85,165 +89,33 @@ Status VCSVTransformer::close() { } Status VCSVTransformer::write(const Block& block) { - using doris::operator<<; + auto ser_col = ColumnString::create(); + ser_col->reserve(block.columns()); + VectorBufferWriter buffer_writer(*ser_col.get()); for (size_t i = 0; i < block.rows(); i++) { for (size_t col_id = 0; col_id < block.columns(); col_id++) { - auto col = block.get_by_position(col_id); - if (col.column->is_null_at(i)) { - fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV); - } else { - switch (_output_vexpr_ctxs[col_id]->root()->type().type) { - case TYPE_BOOLEAN: - case TYPE_TINYINT: - fmt::format_to( - _outstream_buffer, "{}", - (int)*reinterpret_cast(col.column->get_data_at(i).data)); - break; - case TYPE_SMALLINT: - fmt::format_to( - _outstream_buffer, "{}", - *reinterpret_cast(col.column->get_data_at(i).data)); - break; - case TYPE_INT: - fmt::format_to( - _outstream_buffer, "{}", - *reinterpret_cast(col.column->get_data_at(i).data)); - break; - case TYPE_BIGINT: - fmt::format_to( - _outstream_buffer, "{}", - *reinterpret_cast(col.column->get_data_at(i).data)); - break; - case TYPE_LARGEINT: - fmt::format_to( - _outstream_buffer, "{}", - *reinterpret_cast(col.column->get_data_at(i).data)); - break; - case TYPE_FLOAT: { - char buffer[MAX_FLOAT_STR_LENGTH + 2]; - float float_value = - *reinterpret_cast(col.column->get_data_at(i).data); - buffer[0] = '\0'; - int length = FloatToBuffer(float_value, MAX_FLOAT_STR_LENGTH, buffer); - DCHECK(length >= 0) << "gcvt float failed, float value=" << float_value; - fmt::format_to(_outstream_buffer, "{}", buffer); - break; - } - case TYPE_DOUBLE: { - // To prevent loss of precision on float and double types, - // they are converted to strings before output. - // For example: For a double value 27361919854.929001, - // the direct output of using std::stringstream is 2.73619e+10, - // and after conversion to a string, it outputs 27361919854.929001 - char buffer[MAX_DOUBLE_STR_LENGTH + 2] = "\0"; - double double_value = - *reinterpret_cast(col.column->get_data_at(i).data); - buffer[0] = '\0'; - int length = DoubleToBuffer(double_value, MAX_DOUBLE_STR_LENGTH, buffer); - DCHECK(length >= 0) << "gcvt double failed, double value=" << double_value; - fmt::format_to(_outstream_buffer, "{}", buffer); - break; - } - case TYPE_DATEV2: { - char buf[64] = "\0"; - const DateV2Value* time_val = - (const DateV2Value*)(col.column->get_data_at(i).data); - time_val->to_string(buf); - fmt::format_to(_outstream_buffer, "{}", buf); - break; - } - case TYPE_DATETIMEV2: { - char buf[64] = "\0"; - const DateV2Value* time_val = - (const DateV2Value*)(col.column->get_data_at(i) - .data); - time_val->to_string(buf, _output_vexpr_ctxs[col_id]->root()->type().scale); - fmt::format_to(_outstream_buffer, "{}", buf); - break; - } - case TYPE_DATE: - case TYPE_DATETIME: { - char buf[64] = "\0"; - const VecDateTimeValue* time_val = - (const VecDateTimeValue*)(col.column->get_data_at(i).data); - time_val->to_string(buf); - fmt::format_to(_outstream_buffer, "{}", buf); - break; - } - case TYPE_OBJECT: - case TYPE_HLL: { - if (!_output_object_data) { - fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV); - break; - } - [[fallthrough]]; - } - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - auto value = col.column->get_data_at(i); - fmt::format_to(_outstream_buffer, "{}", value); - break; - } - case TYPE_DECIMALV2: { - const DecimalV2Value decimal_val( - reinterpret_cast(col.column->get_data_at(i).data) - ->value); - fmt::format_to(_outstream_buffer, "{}", decimal_val.to_string()); - break; - } - case TYPE_DECIMAL32: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - case TYPE_DECIMAL64: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - case TYPE_DECIMAL128I: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - case TYPE_ARRAY: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - case TYPE_MAP: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - case TYPE_STRUCT: { - fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i)); - break; - } - default: { - // not supported type, like BITMAP, just export null - fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV); - } - } - } - if (col_id < block.columns() - 1) { - fmt::format_to(_outstream_buffer, "{}", _column_separator); + if (col_id != 0) { + buffer_writer.write(_column_separator.data(), _column_separator.size()); } + RETURN_IF_ERROR(_serdes[col_id]->serialize_one_cell_to_json( + *(block.get_by_position(col_id).column), i, buffer_writer, _options)); } - fmt::format_to(_outstream_buffer, "{}", _line_delimiter); + buffer_writer.write(_line_delimiter.data(), _line_delimiter.size()); + buffer_writer.commit(); } - - return _flush_plain_text_outstream(); + return _flush_plain_text_outstream(*ser_col.get()); } -Status VCSVTransformer::_flush_plain_text_outstream() { - size_t pos = _outstream_buffer.size(); - if (pos == 0) { +Status VCSVTransformer::_flush_plain_text_outstream(ColumnString& ser_col) { + if (ser_col.byte_size() == 0) { return Status::OK(); } RETURN_IF_ERROR( - _file_writer->append(Slice(_outstream_buffer.data(), _outstream_buffer.size()))); + _file_writer->append(Slice(ser_col.get_chars().data(), ser_col.get_chars().size()))); // clear the stream - _outstream_buffer.clear(); - + ser_col.clear(); return Status::OK(); } @@ -259,6 +131,4 @@ std::string VCSVTransformer::_gen_csv_header_types() { types += _line_delimiter; return types; } - -const std::string VCSVTransformer::NULL_IN_CSV = "\\N"; } // namespace doris::vectorized diff --git a/be/src/vec/runtime/vcsv_transformer.h b/be/src/vec/runtime/vcsv_transformer.h index bb28db6256..2cbe0e5b74 100644 --- a/be/src/vec/runtime/vcsv_transformer.h +++ b/be/src/vec/runtime/vcsv_transformer.h @@ -26,6 +26,8 @@ #include #include +#include + #include "vfile_format_transformer.h" namespace doris { @@ -53,10 +55,9 @@ public: int64_t written_len() override; private: - Status _flush_plain_text_outstream(); + Status _flush_plain_text_outstream(ColumnString& ser_col); std::string _gen_csv_header_types(); - static const std::string NULL_IN_CSV; std::string _csv_header; std::string_view _column_separator; std::string_view _line_delimiter; diff --git a/be/src/vec/runtime/vfile_format_transformer.h b/be/src/vec/runtime/vfile_format_transformer.h index 2b5440fdfd..dc2ded5386 100644 --- a/be/src/vec/runtime/vfile_format_transformer.h +++ b/be/src/vec/runtime/vfile_format_transformer.h @@ -39,6 +39,7 @@ public: for (int i = 0; i < output_vexpr_ctxs.size(); ++i) { data_types.push_back(output_vexpr_ctxs[i]->root()->data_type()); } + _options._output_object_data = output_object_data; _serdes = vectorized::create_data_type_serdes(data_types); } diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp index e8aeb9faba..259ab9b7ff 100644 --- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp @@ -221,11 +221,14 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) { auto ser_col = ColumnString::create(); ser_col->reserve(3); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); - serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); - serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); rtrim(min_s); rtrim(max_s); @@ -259,7 +262,9 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option); + Status st = + serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string()); diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp index 2d64e78af7..8566e40764 100644 --- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp @@ -163,7 +163,9 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { } EXPECT_EQ(st.ok(), true); // serialize - serde->serialize_one_cell_to_json(*col, i, buffer_writer, default_format_option); + st = serde->serialize_one_cell_to_json(*col, i, buffer_writer, + default_format_option); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); EXPECT_EQ(ser_col->get_data_at(ser_col->size() - 1).to_string(), std::get<2>(type_pair)[i]); @@ -220,11 +222,14 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(3); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); - serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); - serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); rtrim(min_s); rtrim(max_s); @@ -258,7 +263,9 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option); + Status st = + serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string()); @@ -276,54 +283,54 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, {"[0, 1,-1,1]", "[true, false]", "[1,true,t]", "[1, false], [,], [1,true,t]", "[,]"}, - {"[0, 1, NULL, 1]", "[1, 0]", "[1, 1, NULL]", - "[1, NULL, NULL, 1, NULL]", "[]"}, - {"[0, 1, NULL, 1]", "[1, 0]", "[1, 1, NULL]", - "[1, NULL, NULL, 1, NULL]", "[]"}), + {"[0, 1, null, 1]", "[1, 0]", "[1, 1, null]", + "[1, null, null, 1, null]", "[]"}, + {"[0, 1, null, 1]", "[1, 0]", "[1, 1, null]", + "[1, null, null, 1, null]", "[]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_TINYINT, {"[1111, 12, ]", "[ed, 2,]", "[],[]", "[[]]", "[,1 , 3]"}, - {"[NULL, 12, NULL]", "[NULL, 2, NULL]", "[NULL]", "[NULL]", "[]"}, - {"[NULL, 12, NULL]", "[NULL, 2, NULL]", "[NULL]", "[NULL]", "[]"}), + {"[null, 12, null]", "[null, 2, null]", "[null]", "[null]", "[]"}, + {"[null, 12, null]", "[null, 2, null]", "[null]", "[null]", "[]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_FLOAT, {"[0.33, 0.67, 0]", "[3.40282e+38, 3.40282e+38+1]", "[\"3.40282e+38+1\"]", "[\"3.14\", 0.77]"}, - {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[NULL, 0.77]"}, - {"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[3.14, 0.77]"}), + {"[0.33, 0.67, 0]", "[3.40282e+38, null]", "[null]", "[null, 0.77]"}, + {"[0.33, 0.67, 0]", "[3.40282e+38, null]", "[null]", "[3.14, 0.77]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DOUBLE, {"[3.1415926, 0.878787878, 12.44456475432]", "[2343.12345465746, 2.22507e-308, 2.22507e-308-1, \"2.22507e-308\"]"}, {"[3.1415926, 0.878787878, 12.44456475432]", - "[2343.12345465746, 2.22507e-308, NULL, NULL]"}, + "[2343.12345465746, 2.22507e-308, null, null]"}, {"[3.1415926, 0.878787878, 12.44456475432]", - "[2343.12345465746, 2.22507e-308, NULL, 2.22507e-308]"}), + "[2343.12345465746, 2.22507e-308, null, 2.22507e-308]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_STRING, {"[\"hello\", \"world\"]", "['a', 'b', 'c']", "[\"42\",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:" "Control#86:Bob#31,17:true:Abe " "Linkedin,BLUE,\"\\N\",\"\u0001\u0002\u0003,\\u0001bc\"]", - "[\"heeeee\",null,\"NULL\",\"\\N\",null,\"sssssssss\"]"}, + "[\"heeeee\",null,\"null\",\"\\N\",null,\"sssssssss\"]"}, // last : ["42",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,"\N",",\u0001bc"] {"[hello, world]", "[a, b, c]", "[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, " "\x1\x2\x3,\\u0001bc]", - "[heeeee, NULL, NULL, \\N, NULL, sssssssss]"}, + "[heeeee, null, null, \\N, null, sssssssss]"}, {"[hello, world]", "[a, b, c]", "[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, " "Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, " "\x1\x2\x3,\\u0001bc]", - "[heeeee, NULL, NULL, \\N, NULL, sssssssss]"}), + "[heeeee, null, null, \\N, null, sssssssss]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATE, {"[\\\"2022-07-13\\\",\"2022-07-13 12:30:00\"]", "[2022-07-13 12:30:00, \"2022-07-13\"]", "[2022-07-13 12:30:00.000, 2022-07-13]"}, - {"[NULL, NULL]", "[2022-07-13, NULL]", "[2022-07-13, 2022-07-13]"}, - {"[NULL, 2022-07-13]", "[2022-07-13, 2022-07-13]", + {"[null, null]", "[2022-07-13, null]", "[2022-07-13, 2022-07-13]"}, + {"[null, 2022-07-13]", "[2022-07-13, 2022-07-13]", "[2022-07-13, 2022-07-13]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATETIME, @@ -333,11 +340,11 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { "\\N", "[null,null,null]", }, - {"[NULL, NULL]", "[2022-07-13 12:30:00, NULL, 2022-07-13 12:30:00]", "NULL", - "[NULL, NULL, NULL]"}, + {"[null, null]", "[2022-07-13 12:30:00, null, 2022-07-13 12:30:00]", "\\N", + "[null, null, null]"}, {"[2022-07-13 00:00:00, 2022-07-13 12:30:00]", - "[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]", "NULL", - "[NULL, NULL, NULL]"}), + "[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]", "\\N", + "[null, null, null]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DECIMAL, {"[4, 5.5, 6.67]", @@ -350,14 +357,14 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { "[\\1234567890123456789.01234567\\]"}, {"[4.000000000, 5.500000000, 6.670000000]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", - "[NULL, NULL, NULL, NULL, NULL]", "[NULL]"}, + "12345678901234567.012345678, null, null]", + "[null, null, null, null, null]", "[null]"}, {"[4.000000000, 5.500000000, 6.670000000]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", + "12345678901234567.012345678, null, null]", "[12345678901234567.012345678, 123456789012345678.012345670, " - "12345678901234567.012345678, NULL, NULL]", - "[NULL]"}), + "12345678901234567.012345678, null, null]", + "[null]"}), }; // array type for (auto type_pair : nested_field_types) { @@ -412,7 +419,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, i, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, i, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test : " << rand_s_d << std::endl; @@ -427,7 +436,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, i, buffer_writer, formatOptions); + status = serde->serialize_one_cell_to_json(*col2, i, buffer_writer, + formatOptions); + EXPECT_EQ(status.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d << std::endl; @@ -448,7 +459,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde_1->serialize_one_cell_to_json(*col3, i, buffer_writer, formatOptions); + st = serde_1->serialize_one_cell_to_json(*col3, i, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str_1, rand_s_d.to_string()); @@ -468,54 +481,54 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { FieldType::OLAP_FIELD_TYPE_STRING, {"{1: \"amory is 7\", 0: \" doris be better \", -1: \"wrong,\"}", "{\"1\": \"amory is 7\", \"0\": 1}"}, - {"{1:amory is 7, 0: doris be better , NULL:wrong,}", - "{NULL:amory is 7, NULL:1}"}), + {"{1:amory is 7, 0: doris be better , null:wrong,}", + "{null:amory is 7, null:1}"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE, - {"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , NULL: 12.2222222, " - ": NULL\\}", - "{\"\": NULL, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}", "\\N", + {"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , null: 12.2222222, " + ": null\\}", + "{\"\": null, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}", "\\N", "{null:null,\"null\":null}", "{\"hello " - "world\":0.2222222,\"hello2\":null,null:1111.1,\"NULL\":null,\"null\":" + "world\":0.2222222,\"hello2\":null,null:1111.1,\"null\":null,\"null\":" "null,\"null\":0.1}"}, - {"{ ,.amory:111.2343, \"\":112, dggs:13.14, NULL:12.2222222, :NULL}", - "{\"\":NULL, NULL:12.44}", "{}", "{}", "NULL", "{}", "NULL", - "{NULL:NULL, null:NULL}", - "{hello world:0.2222222, hello2:NULL, NULL:1111.1, NULL:NULL, null:NULL, " + {"{ ,.amory:111.2343, \"\":112, dggs:13.14, null:12.2222222, :null}", + "{\"\":null, null:12.44}", "{}", "{}", "\\N", "{}", "\\N", + "{null:null, null:null}", + "{hello world:0.2222222, hello2:null, null:1111.1, null:null, null:null, " "null:0.1}"}), FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT, FieldType::OLAP_FIELD_TYPE_DOUBLE, {"{0.33: 3.1415926,3.1415926: 22}", "{3.14, 15926: 22}", "{3.14}", "{222:3444},", "{4.12, 677: 455: 356, 67.6:67.7}", "{null:null,null:1.0,1.0:null}"}, - {"{0.33:3.1415926, 3.1415925:22}", "{NULL:22}", "{}", "NULL", - "{NULL:NULL, 67.6:67.7}", "{NULL:NULL, NULL:1, 1:NULL}"}), + {"{0.33:3.1415926, 3.1415925:22}", "{null:22}", "{}", "\\N", + "{null:null, 67.6:67.7}", "{null:null, null:1, 1:null}"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " - "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, null: null, " "2022-07-13:'2022-07-13 12:30:00'}", // escaped char ':' "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}", "\\N"}, - {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " - "NULL:NULL, 2022-07-13:NULL}", + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:null, 2022-07-13:null, " + "null:null, 2022-07-13:null}", "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " - "2022-07-13:NULL}", - "NULL"}), + "2022-07-13:null}", + "\\N"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, - {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, null: null}", "{\"2022-07-13 12:30:00\": \"12.45675432\"}", "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " - "NULL:NULL}", - "{NULL:NULL}", + "null:null}", + "{null:null}", "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", - "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + "{2022-07-13 12:30:00:12.456754320, null:12.340000000}"}), }; for (auto type_pair : nested_field_types) { @@ -551,7 +564,8 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str, rand_s_d.to_string()); @@ -565,8 +579,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, col2->size() - 1, buffer_writer, - formatOptions); + stat = serde->serialize_one_cell_to_json(*col2, col2->size() - 1, buffer_writer, + formatOptions); + EXPECT_EQ(stat.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d.to_string() << std::endl; @@ -581,26 +596,26 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME, {"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 " - "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, " + "12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, null: null, " "2022-07-13:'2022-07-13 12:30:00'}", // escaped char ':' "{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: " "2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"}, - {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, " - "NULL:NULL, 2022-07-13:2022-07-13 12:30:00}", + {"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:null, 2022-07-13:null, " + "null:null, 2022-07-13:2022-07-13 12:30:00}", "{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, " "2022-07-13:2022-07-13 12:30:00}"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL, - {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}", + {"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, null: null}", "{\"2022-07-13 12:30:00\": \"12.45675432\"}", "{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}", "{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"}, {"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, " - "NULL:NULL}", + "null:null}", "{2022-07-13 12:30:00:12.456754320}", "{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}", - "{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}), + "{2022-07-13 12:30:00:12.456754320, null:12.340000000}"}), }; for (auto type_pair : field_types) { auto key_type = std::get<0>(type_pair); @@ -636,7 +651,8 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str, rand_s_d.to_string()); @@ -657,10 +673,10 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_STRING, {"[[Hello, World],[This, is, a, nested, array],null,[null,null,aaaa]]"}, - {"[[Hello, World], [This, is, a, nested, array], NULL, [NULL, NULL, " + {"[[Hello, World], [This, is, a, nested, array], null, [null, null, " "aaaa]]"}, - {"[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL]"}, - {"[[Hello, World], [This, is, a, nested, array], NULL, [NULL, NULL, " + {"[null, null, null, null, null, null, null, null, null, null, null]"}, + {"[[Hello, World], [This, is, a, nested, array], null, [null, null, " "aaaa]]"}), FieldType_RandStr( FieldType::OLAP_FIELD_TYPE_STRING, @@ -712,7 +728,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test : " << rand_s_d << std::endl; @@ -731,7 +749,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions); + status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, + formatOptions); + EXPECT_EQ(status.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d << std::endl; @@ -754,7 +774,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions); + st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str_1, rand_s_d.to_string()); @@ -782,7 +804,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { "3050124830713523,\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754},{\"HfhEMX-" "oAMBJCC-YIC-hCqN\":0.8131454631693608,\"xrnTFd-ikONWik-T7J-sL8J\":0." "37509722558990855,\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086," - "\"NULL\":0.1,\"\\N\":0.1,null:null}, {NULL:0.1, NULL:NULL, \"NULL\":0}]"}, + "\"null\":0.1,\"null\":0.1,null:null}, {null:0.1, null:null, \"null\":0}]"}, {"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, " "eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, " "FwUSOB-R8rtK9W-BVG-8wYZ:0.7680704548628841}, " @@ -797,8 +819,8 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { "mKH57V-YmwCNFq-vs8-vUIX:0.36446683035480754}, " "{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, " "xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, " - "SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, NULL:0.1, \\N:0.1, NULL:NULL}, " - "{NULL:0.1, NULL:NULL, NULL:0}]"}, + "SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, null:0.1, null:0.1, null:null}, " + "{null:0.1, null:null, null:0}]"}, {""}, {"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, " "eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, " @@ -815,7 +837,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { "{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, " "xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, " "SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, " - "NULL:0.1, \\N:0.1, NULL:NULL}, {NULL:0.1, NULL:NULL, NULL:0}]"})}; + "null:0.1, null:0.1, null:null}, {null:0.1, null:null, null:0}]"})}; for (auto type_pair : nested_field_types) { auto key_type = std::get<0>(type_pair); DataTypePtr nested_key_data_type_ptr = @@ -860,7 +882,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test : " << rand_s_d << std::endl; @@ -879,7 +903,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions); + status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, + formatOptions); + EXPECT_EQ(status.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d << std::endl; @@ -902,7 +928,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions); + st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str_1, rand_s_d.to_string()); @@ -1072,7 +1100,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test : " << rand_s_d << std::endl; @@ -1091,7 +1121,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions); + status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, + formatOptions); + EXPECT_EQ(status.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d << std::endl; @@ -1114,7 +1146,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions); + st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str_1, rand_s_d.to_string()); @@ -1207,7 +1241,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions); + st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test : " << rand_s_d << std::endl; @@ -1226,7 +1262,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions); + status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, + formatOptions); + EXPECT_EQ(status.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); std::cout << "test from string: " << rand_s_d << std::endl; @@ -1249,7 +1287,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) { auto ser_col = ColumnString::create(); ser_col->reserve(1); VectorBufferWriter buffer_writer(*ser_col.get()); - serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions); + st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, + formatOptions); + EXPECT_EQ(st.ok(), true); buffer_writer.commit(); StringRef rand_s_d = ser_col->get_data_at(0); EXPECT_EQ(expect_str_1, rand_s_d.to_string());