[Opt](exec)(multi-catalog) Opt date type reading. (#29571)

This commit is contained in:
Qi Chen
2024-01-09 23:29:38 +08:00
committed by yiguolei
parent fe5b0e9880
commit 7287c0ca15
5 changed files with 55 additions and 50 deletions

View File

@ -124,9 +124,8 @@ struct ConvertParams {
int64_t scale_to_nano_factor = 1;
DecimalScaleParams decimal_scale;
FieldSchema* field_schema = nullptr;
size_t start_idx = 0;
void init(FieldSchema* field_schema_, cctz::time_zone* ctz_, size_t start_idx_ = 0) {
void init(FieldSchema* field_schema_, cctz::time_zone* ctz_) {
field_schema = field_schema_;
if (ctz_ != nullptr) {
ctz = ctz_;
@ -165,7 +164,6 @@ struct ConvertParams {
t.from_unixtime(0, *ctz);
offset_days = t.day() == 31 ? -1 : 0;
}
start_idx = start_idx_;
}
template <typename DecimalPrimitiveType>
@ -233,11 +231,12 @@ struct NumberToNumberConvert : public ColumnConvert {
size_t rows = src_col->size();
auto& src_data = static_cast<const ColumnType*>(src_col.get())->get_data();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->resize(start_idx + rows);
auto& data = static_cast<ColumnVector<dst_type>&>(*dst_col.get()).get_data();
for (int i = 0; i < rows; i++) {
dst_type value = static_cast<dst_type>(src_data[i]);
data[_convert_params->start_idx + i] = value;
data[start_idx + i] = value;
}
return Status::OK();
@ -288,12 +287,13 @@ public:
size_t rows = src_col->size() / sizeof(ParquetInt96);
auto& src_data = static_cast<const ColumnVector<Int8>*>(src_col.get())->get_data();
auto ParquetInt96_data = (ParquetInt96*)src_data.data();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->resize(start_idx + rows);
auto& data = static_cast<ColumnVector<UInt64>*>(dst_col.get())->get_data();
for (int i = 0; i < rows; i++) {
ParquetInt96 x = ParquetInt96_data[i];
auto& num = data[_convert_params->start_idx + i];
auto& num = data[start_idx + i];
auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
int64_t micros = x.to_timestamp_micros();
value.from_unixtime(micros / 1000000, *_convert_params->ctz);
@ -309,14 +309,15 @@ public:
convert_null(src_col, dst_col);
size_t rows = src_col->size();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->resize(start_idx + rows);
auto src_data = static_cast<const ColumnVector<int64_t>*>(src_col.get())->get_data().data();
auto& data = static_cast<ColumnVector<UInt64>*>(dst_col.get())->get_data();
for (int i = 0; i < rows; i++) {
int64_t x = src_data[i];
auto& num = data[_convert_params->start_idx + i];
auto& num = data[start_idx + i];
auto& value = reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
value.from_unixtime(x / _convert_params->second_mask, *_convert_params->ctz);
value.set_microsecond((x % _convert_params->second_mask) *
@ -332,17 +333,16 @@ public:
convert_null(src_col, dst_col);
size_t rows = src_col->size();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->reserve(start_idx + rows);
auto& src_data = static_cast<const ColumnVector<int32>*>(src_col.get())->get_data();
auto& data = static_cast<ColumnDateV2*>(dst_col.get())->get_data();
date_day_offset_dict& date_dict = date_day_offset_dict::get();
for (int i = 0; i < rows; i++) {
auto& value = reinterpret_cast<DateV2Value<DateV2ValueType>&>(
data[_convert_params->start_idx + i]);
int64_t date_value = (int64_t)src_data[i] + _convert_params->offset_days;
value = date_dict[date_value];
data.push_back_without_reserve(date_dict[date_value].to_date_int_val());
}
return Status::OK();
@ -359,7 +359,8 @@ public:
DecimalScaleParams& scale_params = _convert_params->decimal_scale;
auto buf = static_cast<const ColumnString*>(src_col.get())->get_chars().data();
auto& offset = static_cast<const ColumnString*>(src_col.get())->get_offsets();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->resize(start_idx + rows);
auto& data = static_cast<ColumnDecimal<DecimalType>*>(dst_col.get())->get_data();
for (int i = 0; i < rows; i++) {
@ -380,13 +381,14 @@ public:
LOG(FATAL) << "__builtin_unreachable";
__builtin_unreachable();
}
auto& v = reinterpret_cast<DecimalType&>(data[_convert_params->start_idx + i]);
auto& v = reinterpret_cast<DecimalType&>(data[start_idx + i]);
v = (DecimalType)value;
}
return Status::OK();
}
};
template <typename NumberType, typename DecimalPhysicalType, typename ValueCopyType,
DecimalScaleParams::ScaleType ScaleType>
class NumberToDecimal : public ColumnConvert {
@ -397,7 +399,8 @@ public:
size_t rows = src_col->size();
auto* src_data =
static_cast<const ColumnVector<NumberType>*>(src_col.get())->get_data().data();
dst_col->resize(_convert_params->start_idx + rows);
size_t start_idx = dst_col->size();
dst_col->resize(start_idx + rows);
DecimalScaleParams& scale_params = _convert_params->decimal_scale;
auto* data =
@ -410,7 +413,7 @@ public:
} else if constexpr (ScaleType == DecimalScaleParams::SCALE_DOWN) {
value /= scale_params.scale_factor;
}
data[_convert_params->start_idx + i] = (DecimalPhysicalType)value;
data[start_idx + i] = (DecimalPhysicalType)value;
}
return Status::OK();
}

View File

@ -577,7 +577,7 @@ Status ScalarColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
if (need_convert) {
std::unique_ptr<ParquetConvert::ColumnConvert> converter;
ParquetConvert::ConvertParams convert_params;
convert_params.init(_field_schema, _ctz, doris_column->size());
convert_params.init(_field_schema, _ctz);
RETURN_IF_ERROR(ParquetConvert::get_converter(parquet_physical_type, show_type, type,
&converter, &convert_params));
auto x = doris_column->assume_mutable();

View File

@ -2668,17 +2668,14 @@ char* DateV2Value<T>::to_string(char* to, int scale) const {
return to + len + 1;
}
template <typename T>
typename DateV2Value<T>::underlying_value DateV2Value<T>::to_date_int_val() const {
return int_val_;
}
// [1900-01-01, 2039-12-31]
static std::array<DateV2Value<DateV2ValueType>, date_day_offset_dict::DICT_DAYS>
DATE_DAY_OFFSET_ITEMS;
// [1900-01-01, 2039-12-31]
static std::array<std::array<std::array<int, 31>, 12>, 140> DATE_DAY_OFFSET_DICT;
std::array<DateV2Value<DateV2ValueType>, date_day_offset_dict::DICT_DAYS>
date_day_offset_dict::DATE_DAY_OFFSET_ITEMS;
static bool DATE_DAY_OFFSET_ITEMS_INIT = false;
// [1900-01-01, 2039-12-31]
std::array<std::array<std::array<int, 31>, 12>, 140> date_day_offset_dict::DATE_DAY_OFFSET_DICT;
bool date_day_offset_dict::DATE_DAY_OFFSET_ITEMS_INIT = false;
date_day_offset_dict date_day_offset_dict::instance = date_day_offset_dict();
@ -2718,16 +2715,6 @@ date_day_offset_dict::date_day_offset_dict() {
DATE_DAY_OFFSET_ITEMS_INIT = true;
}
DateV2Value<DateV2ValueType> date_day_offset_dict::operator[](int day) const {
int index = day + DAY_BEFORE_EPOCH;
if (LIKELY(index >= 0 && index < DICT_DAYS)) {
return DATE_DAY_OFFSET_ITEMS[index];
} else {
DateV2Value<DateV2ValueType> d = DATE_DAY_OFFSET_ITEMS[0];
return d += index;
}
}
int date_day_offset_dict::daynr(int year, int month, int day) const {
return DATE_DAY_OFFSET_DICT[year - START_YEAR][month - 1][day - 1];
}

View File

@ -753,9 +753,11 @@ public:
// Constructor
DateV2Value() : date_v2_value_(0, 0, 0, 0, 0, 0, 0) {}
DateV2Value(DateV2Value<T>& other) { int_val_ = other.to_date_int_val(); }
DateV2Value(underlying_value int_val) : int_val_(int_val) {}
DateV2Value(const DateV2Value<T>& other) { int_val_ = other.to_date_int_val(); }
DateV2Value(DateV2Value<T>& other) = default;
DateV2Value(const DateV2Value<T>& other) = default;
static DateV2Value create_from_olap_date(uint64_t value) {
DateV2Value<T> date;
@ -1132,7 +1134,7 @@ public:
this->microsecond() == 0;
}
underlying_value to_date_int_val() const;
underlying_value to_date_int_val() const { return int_val_; }
bool from_date(uint32_t value);
bool from_datetime(uint64_t value);
@ -1528,14 +1530,6 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1, const DateV2Value<T>& t
*/
class date_day_offset_dict {
private:
static date_day_offset_dict instance;
date_day_offset_dict();
~date_day_offset_dict() = default;
date_day_offset_dict(const date_day_offset_dict&) = default;
date_day_offset_dict& operator=(const date_day_offset_dict&) = default;
public:
static constexpr int DAY_BEFORE_EPOCH = 25567; // 1900-01-01
static constexpr int DAY_AFTER_EPOCH = 25566; // 2039-12-31
static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; // 1 means 1970-01-01
@ -1545,6 +1539,19 @@ public:
static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR =
719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, returns 1)
static std::array<DateV2Value<DateV2ValueType>, DICT_DAYS> DATE_DAY_OFFSET_ITEMS;
static std::array<std::array<std::array<int, 31>, 12>, 140> DATE_DAY_OFFSET_DICT;
static bool DATE_DAY_OFFSET_ITEMS_INIT;
static date_day_offset_dict instance;
date_day_offset_dict();
~date_day_offset_dict() = default;
date_day_offset_dict(const date_day_offset_dict&) = default;
date_day_offset_dict& operator=(const date_day_offset_dict&) = default;
public:
static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year <= END_YEAR; }
static int get_offset_by_daynr(int daynr) { return daynr - DAY_OFFSET_CAL_START_POINT_DAYNR; }
@ -1558,7 +1565,15 @@ public:
static bool get_dict_init();
DateV2Value<DateV2ValueType> operator[](int day) const;
inline DateV2Value<DateV2ValueType> operator[](int day) const {
int index = day + DAY_BEFORE_EPOCH;
if (LIKELY(index >= 0 && index < DICT_DAYS)) {
return DATE_DAY_OFFSET_ITEMS[index];
} else {
DateV2Value<DateV2ValueType> d = DATE_DAY_OFFSET_ITEMS[0];
return d += index;
}
}
int daynr(int year, int month, int day) const;
};

View File

@ -267,7 +267,7 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column
if (need_convert) {
std::unique_ptr<ParquetConvert::ColumnConvert> converter;
ParquetConvert::ConvertParams convert_params;
convert_params.init(field_schema, &ctz, doris_column->size());
convert_params.init(field_schema, &ctz);
RETURN_IF_ERROR(ParquetConvert::get_converter(parquet_physical_type, show_type, data_type,
&converter, &convert_params));
auto x = doris_column->assume_mutable();