diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 1b876a86ce..e5ee210417 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -124,9 +124,8 @@ struct ConvertParams { int64_t scale_to_nano_factor = 1; DecimalScaleParams decimal_scale; FieldSchema* field_schema = nullptr; - size_t start_idx = 0; - void init(FieldSchema* field_schema_, cctz::time_zone* ctz_, size_t start_idx_ = 0) { + void init(FieldSchema* field_schema_, cctz::time_zone* ctz_) { field_schema = field_schema_; if (ctz_ != nullptr) { ctz = ctz_; @@ -165,7 +164,6 @@ struct ConvertParams { t.from_unixtime(0, *ctz); offset_days = t.day() == 31 ? -1 : 0; } - start_idx = start_idx_; } template @@ -233,11 +231,12 @@ struct NumberToNumberConvert : public ColumnConvert { size_t rows = src_col->size(); auto& src_data = static_cast(src_col.get())->get_data(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->resize(start_idx + rows); auto& data = static_cast&>(*dst_col.get()).get_data(); for (int i = 0; i < rows; i++) { dst_type value = static_cast(src_data[i]); - data[_convert_params->start_idx + i] = value; + data[start_idx + i] = value; } return Status::OK(); @@ -288,12 +287,13 @@ public: size_t rows = src_col->size() / sizeof(ParquetInt96); auto& src_data = static_cast*>(src_col.get())->get_data(); auto ParquetInt96_data = (ParquetInt96*)src_data.data(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->resize(start_idx + rows); auto& data = static_cast*>(dst_col.get())->get_data(); for (int i = 0; i < rows; i++) { ParquetInt96 x = ParquetInt96_data[i]; - auto& num = data[_convert_params->start_idx + i]; + auto& num = data[start_idx + i]; auto& value = reinterpret_cast&>(num); int64_t micros = x.to_timestamp_micros(); value.from_unixtime(micros / 1000000, *_convert_params->ctz); @@ -309,14 +309,15 @@ public: convert_null(src_col, dst_col); size_t rows = src_col->size(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->resize(start_idx + rows); auto src_data = static_cast*>(src_col.get())->get_data().data(); auto& data = static_cast*>(dst_col.get())->get_data(); for (int i = 0; i < rows; i++) { int64_t x = src_data[i]; - auto& num = data[_convert_params->start_idx + i]; + auto& num = data[start_idx + i]; auto& value = reinterpret_cast&>(num); value.from_unixtime(x / _convert_params->second_mask, *_convert_params->ctz); value.set_microsecond((x % _convert_params->second_mask) * @@ -332,17 +333,16 @@ public: convert_null(src_col, dst_col); size_t rows = src_col->size(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->reserve(start_idx + rows); auto& src_data = static_cast*>(src_col.get())->get_data(); auto& data = static_cast(dst_col.get())->get_data(); date_day_offset_dict& date_dict = date_day_offset_dict::get(); for (int i = 0; i < rows; i++) { - auto& value = reinterpret_cast&>( - data[_convert_params->start_idx + i]); int64_t date_value = (int64_t)src_data[i] + _convert_params->offset_days; - value = date_dict[date_value]; + data.push_back_without_reserve(date_dict[date_value].to_date_int_val()); } return Status::OK(); @@ -359,7 +359,8 @@ public: DecimalScaleParams& scale_params = _convert_params->decimal_scale; auto buf = static_cast(src_col.get())->get_chars().data(); auto& offset = static_cast(src_col.get())->get_offsets(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->resize(start_idx + rows); auto& data = static_cast*>(dst_col.get())->get_data(); for (int i = 0; i < rows; i++) { @@ -380,13 +381,14 @@ public: LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } - auto& v = reinterpret_cast(data[_convert_params->start_idx + i]); + auto& v = reinterpret_cast(data[start_idx + i]); v = (DecimalType)value; } return Status::OK(); } }; + template class NumberToDecimal : public ColumnConvert { @@ -397,7 +399,8 @@ public: size_t rows = src_col->size(); auto* src_data = static_cast*>(src_col.get())->get_data().data(); - dst_col->resize(_convert_params->start_idx + rows); + size_t start_idx = dst_col->size(); + dst_col->resize(start_idx + rows); DecimalScaleParams& scale_params = _convert_params->decimal_scale; auto* data = @@ -410,7 +413,7 @@ public: } else if constexpr (ScaleType == DecimalScaleParams::SCALE_DOWN) { value /= scale_params.scale_factor; } - data[_convert_params->start_idx + i] = (DecimalPhysicalType)value; + data[start_idx + i] = (DecimalPhysicalType)value; } return Status::OK(); } diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index a675046e1f..7c08839413 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -577,7 +577,7 @@ Status ScalarColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr if (need_convert) { std::unique_ptr converter; ParquetConvert::ConvertParams convert_params; - convert_params.init(_field_schema, _ctz, doris_column->size()); + convert_params.init(_field_schema, _ctz); RETURN_IF_ERROR(ParquetConvert::get_converter(parquet_physical_type, show_type, type, &converter, &convert_params)); auto x = doris_column->assume_mutable(); diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 2be57b0ae2..7755cbea7a 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2668,17 +2668,14 @@ char* DateV2Value::to_string(char* to, int scale) const { return to + len + 1; } -template -typename DateV2Value::underlying_value DateV2Value::to_date_int_val() const { - return int_val_; -} // [1900-01-01, 2039-12-31] -static std::array, date_day_offset_dict::DICT_DAYS> - DATE_DAY_OFFSET_ITEMS; -// [1900-01-01, 2039-12-31] -static std::array, 12>, 140> DATE_DAY_OFFSET_DICT; +std::array, date_day_offset_dict::DICT_DAYS> + date_day_offset_dict::DATE_DAY_OFFSET_ITEMS; -static bool DATE_DAY_OFFSET_ITEMS_INIT = false; +// [1900-01-01, 2039-12-31] +std::array, 12>, 140> date_day_offset_dict::DATE_DAY_OFFSET_DICT; + +bool date_day_offset_dict::DATE_DAY_OFFSET_ITEMS_INIT = false; date_day_offset_dict date_day_offset_dict::instance = date_day_offset_dict(); @@ -2718,16 +2715,6 @@ date_day_offset_dict::date_day_offset_dict() { DATE_DAY_OFFSET_ITEMS_INIT = true; } -DateV2Value date_day_offset_dict::operator[](int day) const { - int index = day + DAY_BEFORE_EPOCH; - if (LIKELY(index >= 0 && index < DICT_DAYS)) { - return DATE_DAY_OFFSET_ITEMS[index]; - } else { - DateV2Value d = DATE_DAY_OFFSET_ITEMS[0]; - return d += index; - } -} - int date_day_offset_dict::daynr(int year, int month, int day) const { return DATE_DAY_OFFSET_DICT[year - START_YEAR][month - 1][day - 1]; } diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 1999c23080..ad3619aba5 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -753,9 +753,11 @@ public: // Constructor DateV2Value() : date_v2_value_(0, 0, 0, 0, 0, 0, 0) {} - DateV2Value(DateV2Value& other) { int_val_ = other.to_date_int_val(); } + DateV2Value(underlying_value int_val) : int_val_(int_val) {} - DateV2Value(const DateV2Value& other) { int_val_ = other.to_date_int_val(); } + DateV2Value(DateV2Value& other) = default; + + DateV2Value(const DateV2Value& other) = default; static DateV2Value create_from_olap_date(uint64_t value) { DateV2Value date; @@ -1132,7 +1134,7 @@ public: this->microsecond() == 0; } - underlying_value to_date_int_val() const; + underlying_value to_date_int_val() const { return int_val_; } bool from_date(uint32_t value); bool from_datetime(uint64_t value); @@ -1528,14 +1530,6 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1, const DateV2Value& t */ class date_day_offset_dict { private: - static date_day_offset_dict instance; - - date_day_offset_dict(); - ~date_day_offset_dict() = default; - date_day_offset_dict(const date_day_offset_dict&) = default; - date_day_offset_dict& operator=(const date_day_offset_dict&) = default; - -public: static constexpr int DAY_BEFORE_EPOCH = 25567; // 1900-01-01 static constexpr int DAY_AFTER_EPOCH = 25566; // 2039-12-31 static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; // 1 means 1970-01-01 @@ -1545,6 +1539,19 @@ public: static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = 719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, returns 1) + static std::array, DICT_DAYS> DATE_DAY_OFFSET_ITEMS; + static std::array, 12>, 140> DATE_DAY_OFFSET_DICT; + + static bool DATE_DAY_OFFSET_ITEMS_INIT; + + static date_day_offset_dict instance; + + date_day_offset_dict(); + ~date_day_offset_dict() = default; + date_day_offset_dict(const date_day_offset_dict&) = default; + date_day_offset_dict& operator=(const date_day_offset_dict&) = default; + +public: static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year <= END_YEAR; } static int get_offset_by_daynr(int daynr) { return daynr - DAY_OFFSET_CAL_START_POINT_DAYNR; } @@ -1558,7 +1565,15 @@ public: static bool get_dict_init(); - DateV2Value operator[](int day) const; + inline DateV2Value operator[](int day) const { + int index = day + DAY_BEFORE_EPOCH; + if (LIKELY(index >= 0 && index < DICT_DAYS)) { + return DATE_DAY_OFFSET_ITEMS[index]; + } else { + DateV2Value d = DATE_DAY_OFFSET_ITEMS[0]; + return d += index; + } + } int daynr(int year, int month, int day) const; }; diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index 4daa548e2e..cb850cf5d5 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -267,7 +267,7 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column if (need_convert) { std::unique_ptr converter; ParquetConvert::ConvertParams convert_params; - convert_params.init(field_schema, &ctz, doris_column->size()); + convert_params.init(field_schema, &ctz); RETURN_IF_ERROR(ParquetConvert::get_converter(parquet_physical_type, show_type, data_type, &converter, &convert_params)); auto x = doris_column->assume_mutable();