[fix](ParquetReader) Fix Parquet Reader to read int96 parquet type problem (#32394)
`hi - JULIAN_EPOCH_OFFSET_DAYS` could be negative, so we can't all use unsigned int.
This commit is contained in:
@ -318,12 +318,13 @@ public:
|
||||
auto& data = static_cast<ColumnVector<UInt64>*>(dst_col.get())->get_data();
|
||||
|
||||
for (int i = 0; i < rows; i++) {
|
||||
ParquetInt96 x = ParquetInt96_data[i];
|
||||
auto& num = data[start_idx + i];
|
||||
auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
|
||||
int64_t micros = x.to_timestamp_micros();
|
||||
value.from_unixtime(micros / 1000000, *_convert_params->ctz);
|
||||
value.set_microsecond(micros % 1000000);
|
||||
ParquetInt96 src_cell_data = ParquetInt96_data[i];
|
||||
auto& dst_value =
|
||||
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
|
||||
|
||||
int64_t timestamp_with_micros = src_cell_data.to_timestamp_micros();
|
||||
dst_value.from_unixtime(timestamp_with_micros / 1000000, *_convert_params->ctz);
|
||||
dst_value.set_microsecond(timestamp_with_micros % 1000000);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -24,9 +24,9 @@
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
const uint32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
|
||||
const uint64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
|
||||
const uint64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
|
||||
const int32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
|
||||
const int64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
|
||||
const int64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
|
||||
|
||||
ColumnSelectVector::ColumnSelectVector(const uint8_t* filter_map, size_t filter_map_size,
|
||||
bool filter_all) {
|
||||
|
||||
@ -48,10 +48,10 @@ struct RowRange {
|
||||
|
||||
#pragma pack(1)
|
||||
struct ParquetInt96 {
|
||||
uint64_t lo; // time of nanoseconds in a day
|
||||
uint32_t hi; // days from julian epoch
|
||||
int64_t lo; // time of nanoseconds in a day
|
||||
int32_t hi; // days from julian epoch
|
||||
|
||||
inline uint64_t to_timestamp_micros() const {
|
||||
inline int64_t to_timestamp_micros() const {
|
||||
return (hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + lo / NANOS_PER_MICROSECOND;
|
||||
}
|
||||
inline __int128 to_int128() const {
|
||||
@ -60,9 +60,9 @@ struct ParquetInt96 {
|
||||
return ans;
|
||||
}
|
||||
|
||||
static const uint32_t JULIAN_EPOCH_OFFSET_DAYS;
|
||||
static const uint64_t MICROS_IN_DAY;
|
||||
static const uint64_t NANOS_PER_MICROSECOND;
|
||||
static const int32_t JULIAN_EPOCH_OFFSET_DAYS;
|
||||
static const int64_t MICROS_IN_DAY;
|
||||
static const int64_t NANOS_PER_MICROSECOND;
|
||||
};
|
||||
#pragma pack()
|
||||
static_assert(sizeof(ParquetInt96) == 12, "The size of ParquetInt96 is not 12.");
|
||||
|
||||
Reference in New Issue
Block a user