[fix](serde)fix the bug in DataTypeNullableSerDe.deserialize_column_from_fixed_json (#41217) (#41960)
bp #41217 ## Proposed changes Issue Number: close #xxx <!--Describe your changes.-->
This commit is contained in:
@ -250,6 +250,9 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone,
|
||||
Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
|
||||
IColumn& column, Slice& slice, int rows, int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
@ -262,6 +265,9 @@ Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
|
||||
|
||||
void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column,
|
||||
int times) const {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
auto& col = static_cast<ColumnVector<UInt64>&>(column);
|
||||
auto sz = col.size();
|
||||
UInt64 val = col.get_element(sz - 1);
|
||||
|
||||
@ -178,6 +178,9 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con
|
||||
Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice,
|
||||
int rows, int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
@ -189,6 +192,9 @@ Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column,
|
||||
|
||||
void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column,
|
||||
int times) const {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
auto& col = static_cast<ColumnVector<UInt32>&>(column);
|
||||
auto sz = col.size();
|
||||
UInt32 val = col.get_element(sz - 1);
|
||||
|
||||
@ -280,6 +280,9 @@ template <typename T>
|
||||
Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
|
||||
IColumn& column, Slice& slice, int rows, int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
@ -293,6 +296,9 @@ Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
|
||||
template <typename T>
|
||||
void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& column,
|
||||
int times) const {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
auto& col = static_cast<ColumnDecimal<T>&>(column);
|
||||
auto sz = col.size();
|
||||
|
||||
|
||||
@ -131,12 +131,15 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector(
|
||||
Status DataTypeNullableSerDe::deserialize_column_from_fixed_json(
|
||||
IColumn& column, Slice& slice, int rows, int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
auto& col = static_cast<ColumnNullable&>(column);
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
}
|
||||
if (rows - 1 != 0) {
|
||||
if (rows > 1) {
|
||||
auto& null_map = col.get_null_map_data();
|
||||
auto& nested_column = col.get_nested_column();
|
||||
|
||||
|
||||
@ -228,6 +228,9 @@ template <typename T>
|
||||
Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
|
||||
IColumn& column, Slice& slice, int rows, int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
@ -241,6 +244,9 @@ Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
|
||||
template <typename T>
|
||||
void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& column,
|
||||
int times) const {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
auto& col = static_cast<ColumnVector<T>&>(column);
|
||||
auto sz = col.size();
|
||||
T val = col.get_element(sz - 1);
|
||||
|
||||
@ -243,17 +243,26 @@ public:
|
||||
virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows,
|
||||
int* num_deserialized,
|
||||
const FormatOptions& options) const {
|
||||
//In this function implementation, we need to consider the case where rows is 0, 1, and other larger integers.
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
*num_deserialized = 0;
|
||||
return st;
|
||||
}
|
||||
insert_column_last_value_multiple_times(column, rows - 1);
|
||||
if (rows > 1) [[likely]] {
|
||||
insert_column_last_value_multiple_times(column, rows - 1);
|
||||
}
|
||||
*num_deserialized = rows;
|
||||
return Status::OK();
|
||||
}
|
||||
// Insert the last value to the end of this column multiple times.
|
||||
virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
//If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);`
|
||||
// you are likely to get incorrect data results.
|
||||
MutableColumnPtr dum_col = column.clone_empty();
|
||||
|
||||
@ -218,6 +218,9 @@ public:
|
||||
Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows,
|
||||
int* num_deserialized,
|
||||
const FormatOptions& options) const override {
|
||||
if (rows < 1) [[unlikely]] {
|
||||
return Status::OK();
|
||||
}
|
||||
Status st = deserialize_one_cell_from_json(column, slice, options);
|
||||
if (!st.ok()) {
|
||||
return st;
|
||||
@ -229,6 +232,9 @@ public:
|
||||
}
|
||||
|
||||
void insert_column_last_value_multiple_times(IColumn& column, int times) const override {
|
||||
if (times < 1) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
auto& col = static_cast<ColumnString&>(column);
|
||||
auto sz = col.size();
|
||||
|
||||
|
||||
@ -654,6 +654,7 @@ Status RowGroupReader::_fill_partition_columns(
|
||||
auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
|
||||
Slice slice(value.data(), value.size());
|
||||
int num_deserialized = 0;
|
||||
// Be careful when reading empty rows from parquet row groups.
|
||||
if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows,
|
||||
&num_deserialized,
|
||||
_text_formatOptions) != Status::OK()) {
|
||||
|
||||
Reference in New Issue
Block a user