[Fix](Outfile) Use data_type_serde to export data to csv file format (#24721)

Modify the outfile logic, use the data type serde framework.
This commit is contained in:
Tiewei Fang
2023-10-07 22:50:44 +08:00
committed by GitHub
parent f3e95608cb
commit 0df32c8e3e
34 changed files with 449 additions and 428 deletions

View File

@ -31,15 +31,16 @@ namespace doris {
namespace vectorized {
class Arena;
void DataTypeArraySerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeArraySerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -57,8 +58,10 @@ void DataTypeArraySerDe::serialize_one_cell_to_json(const IColumn& column, int r
// add ' ' to keep same with origin format with array
options.field_delim = options.collection_delim;
options.field_delim += " ";
nested_serde->serialize_column_to_json(nested_column, offset, next_offset, bw, options);
RETURN_IF_ERROR(nested_serde->serialize_column_to_json(nested_column, offset, next_offset, bw,
options, nesting_level + 1));
bw.write("]", 1);
return Status::OK();
}
Status DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -38,11 +38,12 @@ class DataTypeArraySerDe : public DataTypeSerDe {
public:
DataTypeArraySerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {}
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -33,16 +33,16 @@ class Arena;
class DataTypeBitMapSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"write_column_to_pb with type " + column.get_name());
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name());
}
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"write_column_to_pb with type " + column.get_name());
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name());
}
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,

View File

@ -27,15 +27,16 @@
namespace doris {
namespace vectorized {
void DataTypeDate64SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDate64SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -59,6 +60,7 @@ void DataTypeDate64SerDe::serialize_one_cell_to_json(const IColumn& column, int
char* pos = value.to_string(buf);
bw.write(buf, pos - buf - 1);
}
return Status::OK();
}
Status DataTypeDate64SerDe::deserialize_column_from_json_vector(IColumn& column,
@ -92,15 +94,13 @@ Status DataTypeDate64SerDe::deserialize_one_cell_from_json(IColumn& column, Slic
return Status::OK();
}
void DataTypeDateTimeSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
}
Status DataTypeDateTimeSerDe::serialize_column_to_json(
const IColumn& column, int start_idx, int end_idx, BufferWritable& bw,
FormatOptions& options, int nesting_level) const {SERIALIZE_COLUMN_TO_JSON()}
void DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -129,6 +129,7 @@ void DataTypeDateTimeSerDe::serialize_one_cell_to_json(const IColumn& column, in
char* pos = value.to_string(buf);
bw.write(buf, pos - buf - 1);
}
return Status::OK();
}
Status DataTypeDateTimeSerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -42,10 +42,11 @@ namespace vectorized {
class Arena;
class DataTypeDate64SerDe : public DataTypeNumberSerDe<Int64> {
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level = 1) const override;
@ -75,11 +76,12 @@ private:
};
class DataTypeDateTimeSerDe : public DataTypeDate64SerDe {
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -27,15 +27,17 @@
namespace doris {
namespace vectorized {
void DataTypeDateTimeV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeDateTimeV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -55,6 +57,7 @@ void DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column,
char* pos = val.to_string(buf);
bw.write(buf, pos - buf - 1);
}
return Status::OK();
}
Status DataTypeDateTimeV2SerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -45,11 +45,12 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<UInt64> {
public:
DataTypeDateTimeV2SerDe(int scale) : scale(scale) {};
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -27,15 +27,16 @@
namespace doris {
namespace vectorized {
void DataTypeDateV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeDateV2SerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -47,6 +48,7 @@ void DataTypeDateV2SerDe::serialize_one_cell_to_json(const IColumn& column, int
char* pos = val.to_string(buf);
// DateTime to_string the end is /0
bw.write(buf, pos - buf - 1);
return Status::OK();
}
Status DataTypeDateV2SerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -42,10 +42,11 @@ namespace vectorized {
class Arena;
class DataTypeDateV2SerDe : public DataTypeNumberSerDe<UInt32> {
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -32,16 +32,18 @@ namespace doris {
namespace vectorized {
template <typename T>
void DataTypeDecimalSerDe<T>::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeDecimalSerDe<T>::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
template <typename T>
void DataTypeDecimalSerDe<T>::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeDecimalSerDe<T>::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -55,6 +57,7 @@ void DataTypeDecimalSerDe<T>::serialize_one_cell_to_json(const IColumn& column,
auto length = col.get_element(row_num).to_string(buf, scale, scale_multiplier);
bw.write(buf, length);
}
return Status::OK();
}
template <typename T>

View File

@ -69,11 +69,12 @@ public:
precision(precision_),
scale_multiplier(decimal_scale_multiplier<typename T::NativeType>(scale)) {}
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -36,16 +36,16 @@ class Arena;
class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_one_cell_to_text with type " + column.get_name());
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name());
}
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_column_to_text with type " + column.get_name());
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name());
}
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -31,20 +31,34 @@
#include "vec/columns/column_const.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
#include "vec/data_types/serde/data_type_nullable_serde.h"
namespace doris {
namespace vectorized {
class IColumn;
void DataTypeHLLSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeHLLSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
if (!options._output_object_data) {
/**
* For null values in ordinary types, we use \N to represent them;
* for null values in nested types, we use null to represent them, just like the json format.
*/
if (nesting_level >= 2) {
bw.write(DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE.c_str(), 4);
} else {
bw.write(DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2);
}
return Status::OK();
}
auto col_row = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = col_row.first;
row_num = col_row.second;
@ -52,6 +66,7 @@ void DataTypeHLLSerDe::serialize_one_cell_to_json(const IColumn& column, int row
std::unique_ptr<char[]> buf = std::make_unique<char[]>(data.max_serialized_size());
size_t size = data.serialize((uint8*)buf.get());
bw.write(buf.get(), size);
return Status::OK();
}
Status DataTypeHLLSerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -33,10 +33,11 @@ class Arena;
class DataTypeHLLSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level = 1) const override;

View File

@ -55,15 +55,16 @@ Status DataTypeJsonbSerDe::write_column_to_mysql(const IColumn& column,
return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
}
void DataTypeJsonbSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeJsonbSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -72,6 +73,7 @@ void DataTypeJsonbSerDe::serialize_one_cell_to_json(const IColumn& column, int r
if (s.size > 0) {
bw.write(s.data, s.size);
}
return Status::OK();
}
Status DataTypeJsonbSerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -42,10 +42,11 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe {
arrow::ArrayBuilder* array_builder, int start,
int end) const override;
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -29,14 +29,15 @@ namespace doris {
namespace vectorized {
class Arena;
void DataTypeMapSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeMapSerDe::serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -55,11 +56,14 @@ void DataTypeMapSerDe::serialize_one_cell_to_json(const IColumn& column, int row
bw.write(&options.collection_delim, 1);
bw.write(" ", 1);
}
key_serde->serialize_one_cell_to_json(nested_keys_column, i, bw, options);
RETURN_IF_ERROR(key_serde->serialize_one_cell_to_json(nested_keys_column, i, bw, options,
nesting_level + 1));
bw.write(&options.map_key_delim, 1);
value_serde->serialize_one_cell_to_json(nested_values_column, i, bw, options);
RETURN_IF_ERROR(value_serde->serialize_one_cell_to_json(nested_values_column, i, bw,
options, nesting_level + 1));
}
bw.write("}", 1);
return Status::OK();
}
Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Slice& slice,

View File

@ -39,10 +39,11 @@ public:
DataTypeMapSerDe(const DataTypeSerDeSPtr& _key_serde, const DataTypeSerDeSPtr& _value_serde)
: key_serde(_key_serde), value_serde(_value_serde) {}
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level = 1) const override;

View File

@ -33,32 +33,43 @@
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/runtime/vcsv_transformer.h"
namespace doris {
namespace vectorized {
class Arena;
void DataTypeNullableSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeNullableSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeNullableSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeNullableSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
const auto& col_null = assert_cast<const ColumnNullable&>(*ptr);
if (col_null.is_null_at(row_num)) {
bw.write("NULL", 4);
/**
* For null values in ordinary types, we use \N to represent them;
* for null values in nested types, we use null to represent them, just like the json format.
*/
if (nesting_level >= 2) {
bw.write(NULL_IN_CSV_FOR_NESTED_TYPE.c_str(), 4);
} else {
bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2);
}
} else {
nested_serde->serialize_one_cell_to_json(col_null.get_nested_column(), row_num, bw,
options);
RETURN_IF_ERROR(nested_serde->serialize_one_cell_to_json(
col_null.get_nested_column(), row_num, bw, options, nesting_level));
}
return Status::OK();
}
Status DataTypeNullableSerDe::deserialize_column_from_json_vector(IColumn& column,
@ -80,7 +91,7 @@ void DataTypeNullableSerDe::serialize_one_cell_to_hive_text(const IColumn& colum
const auto& col_null = assert_cast<const ColumnNullable&>(*ptr);
if (col_null.is_null_at(row_num)) {
bw.write("\\N", 2);
bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2);
} else {
nested_serde->serialize_one_cell_to_hive_text(col_null.get_nested_column(), row_num, bw,
options, nesting_level);
@ -327,5 +338,8 @@ Status DataTypeNullableSerDe::write_column_to_orc(const IColumn& column, const N
return Status::OK();
}
const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_ORDINARY_TYPE = "\\N";
const std::string DataTypeNullableSerDe::NULL_IN_CSV_FOR_NESTED_TYPE = "null";
} // namespace vectorized
} // namespace doris

View File

@ -35,10 +35,11 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
public:
DataTypeNullableSerDe(const DataTypeSerDeSPtr& _nested_serde) : nested_serde(_nested_serde) {}
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level = 1) const override;
@ -88,6 +89,10 @@ public:
nested_serde->set_return_object_as_string(value);
}
static const std::string NULL_IN_CSV_FOR_ORDINARY_TYPE;
static const std::string NULL_IN_CSV_FOR_NESTED_TYPE;
private:
template <bool is_binary_format>
Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result,

View File

@ -63,7 +63,7 @@ using DORIS_NUMERIC_ARROW_BUILDER =
arrow::Int64Builder, UInt128, arrow::FixedSizeBinaryBuilder, Int128,
arrow::FixedSizeBinaryBuilder, Float32, arrow::FloatBuilder, Float64,
arrow::DoubleBuilder, void,
void // 添加这一行来表示TypeMap的末端
void // Add this line to represent the end of the TypeMap
>;
template <typename T>
@ -137,16 +137,18 @@ Status DataTypeNumberSerDe<T>::deserialize_one_cell_from_json(IColumn& column, S
}
template <typename T>
void DataTypeNumberSerDe<T>::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeNumberSerDe<T>::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
template <typename T>
void DataTypeNumberSerDe<T>::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeNumberSerDe<T>::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
@ -162,6 +164,7 @@ void DataTypeNumberSerDe<T>::serialize_one_cell_to_json(const IColumn& column, i
} else if constexpr (std::is_integral<T>::value || std::numeric_limits<T>::is_iec559) {
bw.write_number(data);
}
return Status::OK();
}
template <typename T>

View File

@ -55,10 +55,11 @@ class DataTypeNumberSerDe : public DataTypeSerDe {
public:
using ColumnType = ColumnVector<T>;
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level = 1) const override;

View File

@ -36,16 +36,16 @@ class Arena;
class DataTypeObjectSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_one_cell_to_text with type " + column.get_name());
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name());
}
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_column_to_text with type " + column.get_name());
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name());
}
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -39,16 +39,16 @@ namespace vectorized {
class DataTypeQuantileStateSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_one_cell_to_text with type " + column.get_name());
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_one_cell_to_json with type [{}]", column.get_name());
}
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_column_to_text with type " + column.get_name());
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override {
return Status::NotSupported("serialize_column_to_json with type [{}]", column.get_name());
}
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -45,13 +45,14 @@ namespace orc {
struct ColumnVectorBatch;
} // namespace orc
#define SERIALIZE_COLUMN_TO_JSON() \
for (size_t i = start_idx; i < end_idx; ++i) { \
if (i != start_idx) { \
bw.write(options.field_delim.data(), options.field_delim.size()); \
} \
serialize_one_cell_to_json(column, i, bw, options); \
}
#define SERIALIZE_COLUMN_TO_JSON() \
for (size_t i = start_idx; i < end_idx; ++i) { \
if (i != start_idx) { \
bw.write(options.field_delim.data(), options.field_delim.size()); \
} \
RETURN_IF_ERROR(serialize_one_cell_to_json(column, i, bw, options, nesting_level)); \
} \
return Status::OK();
#define DESERIALIZE_COLUMN_FROM_JSON_VECTOR() \
for (int i = 0; i < slices.size(); ++i) { \
@ -135,6 +136,11 @@ public:
char escape_char = 0;
/**
* only used for export data
*/
bool _output_object_data = true;
[[nodiscard]] char get_collection_delimiter(int nesting_level) const {
CHECK(0 <= nesting_level && nesting_level <= 153);
@ -187,12 +193,14 @@ public:
DataTypeSerDe();
virtual ~DataTypeSerDe();
// Text serializer and deserializer with formatOptions to handle different text format
virtual void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const = 0;
virtual Status serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const = 0;
// this function serialize multi-column to one row text to avoid virtual function call in complex type nested loop
virtual void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const = 0;
virtual Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const = 0;
virtual Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
@ -219,7 +227,11 @@ public:
virtual void serialize_one_cell_to_hive_text(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const {
serialize_one_cell_to_json(column, row_num, bw, options);
Status st = serialize_one_cell_to_json(column, row_num, bw, options);
if (!st.ok()) {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"serialize_one_cell_to_json error: {}", st.to_string());
}
}
// Protobuf serializer and deserializer

View File

@ -33,21 +33,23 @@ namespace doris {
namespace vectorized {
class Arena;
void DataTypeStringSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options) const {
SERIALIZE_COLUMN_TO_JSON()
Status DataTypeStringSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
void DataTypeStringSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw,
FormatOptions& options) const {
Status DataTypeStringSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
const auto& value = assert_cast<const ColumnString&>(*ptr).get_data_at(row_num);
bw.write(value.data, value.size);
return Status::OK();
}
Status DataTypeStringSerDe::deserialize_column_from_json_vector(IColumn& column,

View File

@ -33,11 +33,12 @@ class Arena;
class DataTypeStringSerDe : public DataTypeSerDe {
public:
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -18,6 +18,7 @@
#include "data_type_struct_serde.h"
#include "arrow/array/builder_nested.h"
#include "common/status.h"
#include "util/jsonb_document.h"
#include "vec/columns/column.h"
#include "vec/columns/column_const.h"
@ -39,6 +40,34 @@ std::optional<size_t> DataTypeStructSerDe::try_get_position_by_name(const String
return std::nullopt;
}
Status DataTypeStructSerDe::serialize_column_to_json(const IColumn& column, int start_idx,
int end_idx, BufferWritable& bw,
FormatOptions& options,
int nesting_level) const {
SERIALIZE_COLUMN_TO_JSON();
}
Status DataTypeStructSerDe::serialize_one_cell_to_json(const IColumn& column, int row_num,
BufferWritable& bw, FormatOptions& options,
int nesting_level) const {
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
const ColumnStruct& struct_column = assert_cast<const ColumnStruct&>(*ptr);
bw.write('{');
for (int i = 0; i < struct_column.get_columns().size(); i++) {
if (i != 0) {
bw.write(',');
bw.write(' ');
}
RETURN_IF_ERROR(elemSerDeSPtrs[i]->serialize_one_cell_to_json(
struct_column.get_column(i), row_num, bw, options, nesting_level + 1));
}
bw.write('}');
return Status::OK();
}
Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,
int nesting_level) const {

View File

@ -108,17 +108,12 @@ public:
DataTypeStructSerDe(const DataTypeSerDeSPtrs& _elemSerDeSPtrs, const Strings names)
: elemSerDeSPtrs(_elemSerDeSPtrs), elemNames(names) {}
void serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_one_cell_to_json with type " + column.get_name());
}
Status serialize_one_cell_to_json(const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options, int nesting_level = 1) const override;
void serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"serialize_column_to_json with type " + column.get_name());
}
Status serialize_column_to_json(const IColumn& column, int start_idx, int end_idx,
BufferWritable& bw, FormatOptions& options,
int nesting_level = 1) const override;
Status deserialize_one_cell_from_json(IColumn& column, Slice& slice,
const FormatOptions& options,

View File

@ -24,6 +24,7 @@
#include <exception>
#include <ostream>
#include "common/status.h"
#include "gutil/strings/numbers.h"
#include "io/fs/file_writer.h"
#include "runtime/define_primitive_type.h"
@ -41,9 +42,12 @@
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/pod_array.h"
#include "vec/common/string_buffer.hpp"
#include "vec/common/string_ref.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/types.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/exec/format/csv/csv_reader.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/runtime/vdatetime_value.h"
@ -85,165 +89,33 @@ Status VCSVTransformer::close() {
}
Status VCSVTransformer::write(const Block& block) {
using doris::operator<<;
auto ser_col = ColumnString::create();
ser_col->reserve(block.columns());
VectorBufferWriter buffer_writer(*ser_col.get());
for (size_t i = 0; i < block.rows(); i++) {
for (size_t col_id = 0; col_id < block.columns(); col_id++) {
auto col = block.get_by_position(col_id);
if (col.column->is_null_at(i)) {
fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV);
} else {
switch (_output_vexpr_ctxs[col_id]->root()->type().type) {
case TYPE_BOOLEAN:
case TYPE_TINYINT:
fmt::format_to(
_outstream_buffer, "{}",
(int)*reinterpret_cast<const int8_t*>(col.column->get_data_at(i).data));
break;
case TYPE_SMALLINT:
fmt::format_to(
_outstream_buffer, "{}",
*reinterpret_cast<const int16_t*>(col.column->get_data_at(i).data));
break;
case TYPE_INT:
fmt::format_to(
_outstream_buffer, "{}",
*reinterpret_cast<const int32_t*>(col.column->get_data_at(i).data));
break;
case TYPE_BIGINT:
fmt::format_to(
_outstream_buffer, "{}",
*reinterpret_cast<const int64_t*>(col.column->get_data_at(i).data));
break;
case TYPE_LARGEINT:
fmt::format_to(
_outstream_buffer, "{}",
*reinterpret_cast<const __int128*>(col.column->get_data_at(i).data));
break;
case TYPE_FLOAT: {
char buffer[MAX_FLOAT_STR_LENGTH + 2];
float float_value =
*reinterpret_cast<const float*>(col.column->get_data_at(i).data);
buffer[0] = '\0';
int length = FloatToBuffer(float_value, MAX_FLOAT_STR_LENGTH, buffer);
DCHECK(length >= 0) << "gcvt float failed, float value=" << float_value;
fmt::format_to(_outstream_buffer, "{}", buffer);
break;
}
case TYPE_DOUBLE: {
// To prevent loss of precision on float and double types,
// they are converted to strings before output.
// For example: For a double value 27361919854.929001,
// the direct output of using std::stringstream is 2.73619e+10,
// and after conversion to a string, it outputs 27361919854.929001
char buffer[MAX_DOUBLE_STR_LENGTH + 2] = "\0";
double double_value =
*reinterpret_cast<const double*>(col.column->get_data_at(i).data);
buffer[0] = '\0';
int length = DoubleToBuffer(double_value, MAX_DOUBLE_STR_LENGTH, buffer);
DCHECK(length >= 0) << "gcvt double failed, double value=" << double_value;
fmt::format_to(_outstream_buffer, "{}", buffer);
break;
}
case TYPE_DATEV2: {
char buf[64] = "\0";
const DateV2Value<DateV2ValueType>* time_val =
(const DateV2Value<DateV2ValueType>*)(col.column->get_data_at(i).data);
time_val->to_string(buf);
fmt::format_to(_outstream_buffer, "{}", buf);
break;
}
case TYPE_DATETIMEV2: {
char buf[64] = "\0";
const DateV2Value<DateTimeV2ValueType>* time_val =
(const DateV2Value<DateTimeV2ValueType>*)(col.column->get_data_at(i)
.data);
time_val->to_string(buf, _output_vexpr_ctxs[col_id]->root()->type().scale);
fmt::format_to(_outstream_buffer, "{}", buf);
break;
}
case TYPE_DATE:
case TYPE_DATETIME: {
char buf[64] = "\0";
const VecDateTimeValue* time_val =
(const VecDateTimeValue*)(col.column->get_data_at(i).data);
time_val->to_string(buf);
fmt::format_to(_outstream_buffer, "{}", buf);
break;
}
case TYPE_OBJECT:
case TYPE_HLL: {
if (!_output_object_data) {
fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV);
break;
}
[[fallthrough]];
}
case TYPE_VARCHAR:
case TYPE_CHAR:
case TYPE_STRING: {
auto value = col.column->get_data_at(i);
fmt::format_to(_outstream_buffer, "{}", value);
break;
}
case TYPE_DECIMALV2: {
const DecimalV2Value decimal_val(
reinterpret_cast<const PackedInt128*>(col.column->get_data_at(i).data)
->value);
fmt::format_to(_outstream_buffer, "{}", decimal_val.to_string());
break;
}
case TYPE_DECIMAL32: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
case TYPE_DECIMAL64: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
case TYPE_DECIMAL128I: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
case TYPE_ARRAY: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
case TYPE_MAP: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
case TYPE_STRUCT: {
fmt::format_to(_outstream_buffer, "{}", col.type->to_string(*col.column, i));
break;
}
default: {
// not supported type, like BITMAP, just export null
fmt::format_to(_outstream_buffer, "{}", NULL_IN_CSV);
}
}
}
if (col_id < block.columns() - 1) {
fmt::format_to(_outstream_buffer, "{}", _column_separator);
if (col_id != 0) {
buffer_writer.write(_column_separator.data(), _column_separator.size());
}
RETURN_IF_ERROR(_serdes[col_id]->serialize_one_cell_to_json(
*(block.get_by_position(col_id).column), i, buffer_writer, _options));
}
fmt::format_to(_outstream_buffer, "{}", _line_delimiter);
buffer_writer.write(_line_delimiter.data(), _line_delimiter.size());
buffer_writer.commit();
}
return _flush_plain_text_outstream();
return _flush_plain_text_outstream(*ser_col.get());
}
Status VCSVTransformer::_flush_plain_text_outstream() {
size_t pos = _outstream_buffer.size();
if (pos == 0) {
Status VCSVTransformer::_flush_plain_text_outstream(ColumnString& ser_col) {
if (ser_col.byte_size() == 0) {
return Status::OK();
}
RETURN_IF_ERROR(
_file_writer->append(Slice(_outstream_buffer.data(), _outstream_buffer.size())));
_file_writer->append(Slice(ser_col.get_chars().data(), ser_col.get_chars().size())));
// clear the stream
_outstream_buffer.clear();
ser_col.clear();
return Status::OK();
}
@ -259,6 +131,4 @@ std::string VCSVTransformer::_gen_csv_header_types() {
types += _line_delimiter;
return types;
}
const std::string VCSVTransformer::NULL_IN_CSV = "\\N";
} // namespace doris::vectorized

View File

@ -26,6 +26,8 @@
#include <parquet/types.h>
#include <stdint.h>
#include <memory>
#include "vfile_format_transformer.h"
namespace doris {
@ -53,10 +55,9 @@ public:
int64_t written_len() override;
private:
Status _flush_plain_text_outstream();
Status _flush_plain_text_outstream(ColumnString& ser_col);
std::string _gen_csv_header_types();
static const std::string NULL_IN_CSV;
std::string _csv_header;
std::string_view _column_separator;
std::string_view _line_delimiter;

View File

@ -39,6 +39,7 @@ public:
for (int i = 0; i < output_vexpr_ctxs.size(); ++i) {
data_types.push_back(output_vexpr_ctxs[i]->root()->data_type());
}
_options._output_object_data = output_object_data;
_serdes = vectorized::create_data_type_serdes(data_types);
}

View File

@ -221,11 +221,14 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(3);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
rtrim(min_s);
rtrim(max_s);
@ -259,7 +262,9 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option);
Status st =
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string());

View File

@ -163,7 +163,9 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
}
EXPECT_EQ(st.ok(), true);
// serialize
serde->serialize_one_cell_to_json(*col, i, buffer_writer, default_format_option);
st = serde->serialize_one_cell_to_json(*col, i, buffer_writer,
default_format_option);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
EXPECT_EQ(ser_col->get_data_at(ser_col->size() - 1).to_string(),
std::get<2>(type_pair)[i]);
@ -220,11 +222,14 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(3);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 1, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 2, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
rtrim(min_s);
rtrim(max_s);
@ -258,7 +263,9 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option);
Status st =
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, default_format_option);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string());
@ -276,54 +283,54 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL,
{"[0, 1,-1,1]", "[true, false]", "[1,true,t]",
"[1, false], [,], [1,true,t]", "[,]"},
{"[0, 1, NULL, 1]", "[1, 0]", "[1, 1, NULL]",
"[1, NULL, NULL, 1, NULL]", "[]"},
{"[0, 1, NULL, 1]", "[1, 0]", "[1, 1, NULL]",
"[1, NULL, NULL, 1, NULL]", "[]"}),
{"[0, 1, null, 1]", "[1, 0]", "[1, 1, null]",
"[1, null, null, 1, null]", "[]"},
{"[0, 1, null, 1]", "[1, 0]", "[1, 1, null]",
"[1, null, null, 1, null]", "[]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_TINYINT,
{"[1111, 12, ]", "[ed, 2,]", "[],[]", "[[]]", "[,1 , 3]"},
{"[NULL, 12, NULL]", "[NULL, 2, NULL]", "[NULL]", "[NULL]", "[]"},
{"[NULL, 12, NULL]", "[NULL, 2, NULL]", "[NULL]", "[NULL]", "[]"}),
{"[null, 12, null]", "[null, 2, null]", "[null]", "[null]", "[]"},
{"[null, 12, null]", "[null, 2, null]", "[null]", "[null]", "[]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_FLOAT,
{"[0.33, 0.67, 0]", "[3.40282e+38, 3.40282e+38+1]", "[\"3.40282e+38+1\"]",
"[\"3.14\", 0.77]"},
{"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[NULL, 0.77]"},
{"[0.33, 0.67, 0]", "[3.40282e+38, NULL]", "[NULL]", "[3.14, 0.77]"}),
{"[0.33, 0.67, 0]", "[3.40282e+38, null]", "[null]", "[null, 0.77]"},
{"[0.33, 0.67, 0]", "[3.40282e+38, null]", "[null]", "[3.14, 0.77]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DOUBLE,
{"[3.1415926, 0.878787878, 12.44456475432]",
"[2343.12345465746, 2.22507e-308, 2.22507e-308-1, \"2.22507e-308\"]"},
{"[3.1415926, 0.878787878, 12.44456475432]",
"[2343.12345465746, 2.22507e-308, NULL, NULL]"},
"[2343.12345465746, 2.22507e-308, null, null]"},
{"[3.1415926, 0.878787878, 12.44456475432]",
"[2343.12345465746, 2.22507e-308, NULL, 2.22507e-308]"}),
"[2343.12345465746, 2.22507e-308, null, 2.22507e-308]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_STRING,
{"[\"hello\", \"world\"]", "['a', 'b', 'c']",
"[\"42\",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:"
"Control#86:Bob#31,17:true:Abe "
"Linkedin,BLUE,\"\\N\",\"\u0001\u0002\u0003,\\u0001bc\"]",
"[\"heeeee\",null,\"NULL\",\"\\N\",null,\"sssssssss\"]"},
"[\"heeeee\",null,\"null\",\"\\N\",null,\"sssssssss\"]"},
// last : ["42",1412341,true,42.43,3.40282e+38+1,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,"\N",",\u0001bc"]
{"[hello, world]", "[a, b, c]",
"[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, "
"Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, "
"\x1\x2\x3,\\u0001bc]",
"[heeeee, NULL, NULL, \\N, NULL, sssssssss]"},
"[heeeee, null, null, \\N, null, sssssssss]"},
{"[hello, world]", "[a, b, c]",
"[42, 1412341, true, 42.43, 3.40282e+38+1, alpha:beta:gamma, "
"Earth#42:Control#86:Bob#31, 17:true:Abe Linkedin, BLUE, \\N, "
"\x1\x2\x3,\\u0001bc]",
"[heeeee, NULL, NULL, \\N, NULL, sssssssss]"}),
"[heeeee, null, null, \\N, null, sssssssss]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATE,
{"[\\\"2022-07-13\\\",\"2022-07-13 12:30:00\"]",
"[2022-07-13 12:30:00, \"2022-07-13\"]",
"[2022-07-13 12:30:00.000, 2022-07-13]"},
{"[NULL, NULL]", "[2022-07-13, NULL]", "[2022-07-13, 2022-07-13]"},
{"[NULL, 2022-07-13]", "[2022-07-13, 2022-07-13]",
{"[null, null]", "[2022-07-13, null]", "[2022-07-13, 2022-07-13]"},
{"[null, 2022-07-13]", "[2022-07-13, 2022-07-13]",
"[2022-07-13, 2022-07-13]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATETIME,
@ -333,11 +340,11 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
"\\N",
"[null,null,null]",
},
{"[NULL, NULL]", "[2022-07-13 12:30:00, NULL, 2022-07-13 12:30:00]", "NULL",
"[NULL, NULL, NULL]"},
{"[null, null]", "[2022-07-13 12:30:00, null, 2022-07-13 12:30:00]", "\\N",
"[null, null, null]"},
{"[2022-07-13 00:00:00, 2022-07-13 12:30:00]",
"[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]", "NULL",
"[NULL, NULL, NULL]"}),
"[2022-07-13 12:30:00, 2022-07-13 00:00:00, 2022-07-13 12:30:00]", "\\N",
"[null, null, null]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DECIMAL,
{"[4, 5.5, 6.67]",
@ -350,14 +357,14 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
"[\\1234567890123456789.01234567\\]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, NULL, NULL]",
"[NULL, NULL, NULL, NULL, NULL]", "[NULL]"},
"12345678901234567.012345678, null, null]",
"[null, null, null, null, null]", "[null]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, NULL, NULL]",
"12345678901234567.012345678, null, null]",
"[12345678901234567.012345678, 123456789012345678.012345670, "
"12345678901234567.012345678, NULL, NULL]",
"[NULL]"}),
"12345678901234567.012345678, null, null]",
"[null]"}),
};
// array type
for (auto type_pair : nested_field_types) {
@ -412,7 +419,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, i, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, i, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test : " << rand_s_d << std::endl;
@ -427,7 +436,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, i, buffer_writer, formatOptions);
status = serde->serialize_one_cell_to_json(*col2, i, buffer_writer,
formatOptions);
EXPECT_EQ(status.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d << std::endl;
@ -448,7 +459,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde_1->serialize_one_cell_to_json(*col3, i, buffer_writer, formatOptions);
st = serde_1->serialize_one_cell_to_json(*col3, i, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str_1, rand_s_d.to_string());
@ -468,54 +481,54 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
FieldType::OLAP_FIELD_TYPE_STRING,
{"{1: \"amory is 7\", 0: \" doris be better \", -1: \"wrong,\"}",
"{\"1\": \"amory is 7\", \"0\": 1}"},
{"{1:amory is 7, 0: doris be better , NULL:wrong,}",
"{NULL:amory is 7, NULL:1}"}),
{"{1:amory is 7, 0: doris be better , null:wrong,}",
"{null:amory is 7, null:1}"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_STRING, FieldType::OLAP_FIELD_TYPE_DOUBLE,
{"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , NULL: 12.2222222, "
": NULL\\}",
"{\"\": NULL, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}", "\\N",
{"{\" ,.amory\": 111.2343, \"\": 112., 'dggs': 13.14 , null: 12.2222222, "
": null\\}",
"{\"\": null, null: 12.44}", "{{}}", "{{}", "}}", "{}, {}", "\\N",
"{null:null,\"null\":null}",
"{\"hello "
"world\":0.2222222,\"hello2\":null,null:1111.1,\"NULL\":null,\"null\":"
"world\":0.2222222,\"hello2\":null,null:1111.1,\"null\":null,\"null\":"
"null,\"null\":0.1}"},
{"{ ,.amory:111.2343, \"\":112, dggs:13.14, NULL:12.2222222, :NULL}",
"{\"\":NULL, NULL:12.44}", "{}", "{}", "NULL", "{}", "NULL",
"{NULL:NULL, null:NULL}",
"{hello world:0.2222222, hello2:NULL, NULL:1111.1, NULL:NULL, null:NULL, "
{"{ ,.amory:111.2343, \"\":112, dggs:13.14, null:12.2222222, :null}",
"{\"\":null, null:12.44}", "{}", "{}", "\\N", "{}", "\\N",
"{null:null, null:null}",
"{hello world:0.2222222, hello2:null, null:1111.1, null:null, null:null, "
"null:0.1}"}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT,
FieldType::OLAP_FIELD_TYPE_DOUBLE,
{"{0.33: 3.1415926,3.1415926: 22}", "{3.14, 15926: 22}", "{3.14}",
"{222:3444},", "{4.12, 677: 455: 356, 67.6:67.7}",
"{null:null,null:1.0,1.0:null}"},
{"{0.33:3.1415926, 3.1415925:22}", "{NULL:22}", "{}", "NULL",
"{NULL:NULL, 67.6:67.7}", "{NULL:NULL, NULL:1, 1:NULL}"}),
{"{0.33:3.1415926, 3.1415925:22}", "{null:22}", "{}", "\\N",
"{null:null, 67.6:67.7}", "{null:null, null:1, 1:null}"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME,
{"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 "
"12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, "
"12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, null: null, "
"2022-07-13:'2022-07-13 12:30:00'}",
// escaped char ':'
"{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: "
"2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}",
"\\N"},
{"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, "
"NULL:NULL, 2022-07-13:NULL}",
{"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:null, 2022-07-13:null, "
"null:null, 2022-07-13:null}",
"{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, "
"2022-07-13:NULL}",
"NULL"}),
"2022-07-13:null}",
"\\N"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL,
{"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}",
{"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, null: null}",
"{\"2022-07-13 12:30:00\": \"12.45675432\"}",
"{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}",
"{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"},
{"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, "
"NULL:NULL}",
"{NULL:NULL}",
"null:null}",
"{null:null}",
"{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}",
"{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}),
"{2022-07-13 12:30:00:12.456754320, null:12.340000000}"}),
};
for (auto type_pair : nested_field_types) {
@ -551,7 +564,8 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str, rand_s_d.to_string());
@ -565,8 +579,9 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, col2->size() - 1, buffer_writer,
formatOptions);
stat = serde->serialize_one_cell_to_json(*col2, col2->size() - 1, buffer_writer,
formatOptions);
EXPECT_EQ(stat.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d.to_string() << std::endl;
@ -581,26 +596,26 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATE, FieldType::OLAP_FIELD_TYPE_DATETIME,
{"{2022-07-13: 2022-07-13 12:30:00, 2022-07-13 12:30:00: 2022-07-13 "
"12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, NULL: NULL, "
"12:30:00, 2022-07-13 12:30:00.000: 2022-07-13 12:30:00.000, null: null, "
"2022-07-13:'2022-07-13 12:30:00'}",
// escaped char ':'
"{2022-07-13 12\\:30\\:00: 2022-07-13, 2022-07-13 12\\:30\\:00.000: "
"2022-07-13 12:30:00.000, 2022-07-13:\'2022-07-13 12:30:00\'}"},
{"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:NULL, 2022-07-13:NULL, "
"NULL:NULL, 2022-07-13:2022-07-13 12:30:00}",
{"{2022-07-13:2022-07-13 12:30:00, 2022-07-13:null, 2022-07-13:null, "
"null:null, 2022-07-13:2022-07-13 12:30:00}",
"{2022-07-13:2022-07-13 00:00:00, 2022-07-13:2022-07-13 12:30:00, "
"2022-07-13:2022-07-13 12:30:00}"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_DATETIME, FieldType::OLAP_FIELD_TYPE_DECIMAL,
{"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, NULL: NULL}",
{"{2022-07-13 12:30:00: 12.45675432, 2022-07-13: 12.45675432, null: null}",
"{\"2022-07-13 12:30:00\": \"12.45675432\"}",
"{2022-07-13 12\\:30\\:00:12.45675432, 2022-07-13#12:30:00: 12.45675432}",
"{2022-07-13 12\\:30\\:00.0000:12.45675432, null:12.34}"},
{"{2022-07-13 12:00:00:30.000000000, 2022-07-13 00:00:00:12.456754320, "
"NULL:NULL}",
"null:null}",
"{2022-07-13 12:30:00:12.456754320}",
"{2022-07-13 12:30:00:12.456754320, 2022-07-13 12:00:00:30.000000000}",
"{2022-07-13 12:30:00:12.456754320, NULL:12.340000000}"}),
"{2022-07-13 12:30:00:12.456754320, null:12.340000000}"}),
};
for (auto type_pair : field_types) {
auto key_type = std::get<0>(type_pair);
@ -636,7 +651,8 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str, rand_s_d.to_string());
@ -657,10 +673,10 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_STRING,
{"[[Hello, World],[This, is, a, nested, array],null,[null,null,aaaa]]"},
{"[[Hello, World], [This, is, a, nested, array], NULL, [NULL, NULL, "
{"[[Hello, World], [This, is, a, nested, array], null, [null, null, "
"aaaa]]"},
{"[NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL]"},
{"[[Hello, World], [This, is, a, nested, array], NULL, [NULL, NULL, "
{"[null, null, null, null, null, null, null, null, null, null, null]"},
{"[[Hello, World], [This, is, a, nested, array], null, [null, null, "
"aaaa]]"}),
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_STRING,
@ -712,7 +728,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test : " << rand_s_d << std::endl;
@ -731,7 +749,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions);
status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer,
formatOptions);
EXPECT_EQ(status.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d << std::endl;
@ -754,7 +774,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions);
st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str_1, rand_s_d.to_string());
@ -782,7 +804,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
"3050124830713523,\"mKH57V-YmwCNFq-vs8-vUIX\":0.36446683035480754},{\"HfhEMX-"
"oAMBJCC-YIC-hCqN\":0.8131454631693608,\"xrnTFd-ikONWik-T7J-sL8J\":0."
"37509722558990855,\"SVyEes-77mlzIr-N6c-DkYw\":0.4703053945053086,"
"\"NULL\":0.1,\"\\N\":0.1,null:null}, {NULL:0.1, NULL:NULL, \"NULL\":0}]"},
"\"null\":0.1,\"null\":0.1,null:null}, {null:0.1, null:null, \"null\":0}]"},
{"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, "
"eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, "
"FwUSOB-R8rtK9W-BVG-8wYZ:0.7680704548628841}, "
@ -797,8 +819,8 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
"mKH57V-YmwCNFq-vs8-vUIX:0.36446683035480754}, "
"{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, "
"xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, "
"SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, NULL:0.1, \\N:0.1, NULL:NULL}, "
"{NULL:0.1, NULL:NULL, NULL:0}]"},
"SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, null:0.1, null:0.1, null:null}, "
"{null:0.1, null:null, null:0}]"},
{""},
{"[{2cKtIM-L1mOcEm-udR-HcB2:0.23929040957798242, "
"eof2UN-Is0EEuA-H5D-hE58:0.42373055809540094, "
@ -815,7 +837,7 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
"{HfhEMX-oAMBJCC-YIC-hCqN:0.8131454631693608, "
"xrnTFd-ikONWik-T7J-sL8J:0.37509722558990855, "
"SVyEes-77mlzIr-N6c-DkYw:0.4703053945053086, "
"NULL:0.1, \\N:0.1, NULL:NULL}, {NULL:0.1, NULL:NULL, NULL:0}]"})};
"null:0.1, null:0.1, null:null}, {null:0.1, null:null, null:0}]"})};
for (auto type_pair : nested_field_types) {
auto key_type = std::get<0>(type_pair);
DataTypePtr nested_key_data_type_ptr =
@ -860,7 +882,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test : " << rand_s_d << std::endl;
@ -879,7 +903,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions);
status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer,
formatOptions);
EXPECT_EQ(status.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d << std::endl;
@ -902,7 +928,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions);
st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str_1, rand_s_d.to_string());
@ -1072,7 +1100,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test : " << rand_s_d << std::endl;
@ -1091,7 +1121,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions);
status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer,
formatOptions);
EXPECT_EQ(status.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d << std::endl;
@ -1114,7 +1146,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions);
st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str_1, rand_s_d.to_string());
@ -1207,7 +1241,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col, 0, buffer_writer, formatOptions);
st = serde->serialize_one_cell_to_json(*col, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test : " << rand_s_d << std::endl;
@ -1226,7 +1262,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde->serialize_one_cell_to_json(*col2, 0, buffer_writer, formatOptions);
status = serde->serialize_one_cell_to_json(*col2, 0, buffer_writer,
formatOptions);
EXPECT_EQ(status.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
std::cout << "test from string: " << rand_s_d << std::endl;
@ -1249,7 +1287,9 @@ TEST(TextSerde, ComplexTypeWithNestedSerdeTextTest) {
auto ser_col = ColumnString::create();
ser_col->reserve(1);
VectorBufferWriter buffer_writer(*ser_col.get());
serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer, formatOptions);
st = serde_1->serialize_one_cell_to_json(*col3, 0, buffer_writer,
formatOptions);
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
EXPECT_EQ(expect_str_1, rand_s_d.to_string());