[optimize](storage)optimize date in storage layer (#8967)
* opt date in storage * code style Co-authored-by: Wang Bo <wangbo36@meituan.com>
This commit is contained in:
@ -224,6 +224,9 @@ COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
|
||||
|
||||
// todo(wb) for date type we use uint32_t to save it but using Predicate<uint24> to evaluate it.
|
||||
// This is done for compatibility with Row Version predicate.
|
||||
// We can use Predicate<uint32_t> for date after Row Version is removed.
|
||||
#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \
|
||||
template <class T> \
|
||||
void CLASS<T>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \
|
||||
@ -236,15 +239,40 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
|
||||
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>( \
|
||||
*(nullable_column->get_null_map_column_ptr())) \
|
||||
.get_data(); \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
|
||||
if constexpr (std::is_same_v<T, uint24_t>) { \
|
||||
auto& predicate_column = \
|
||||
reinterpret_cast<const vectorized::PredicateColumnType<uint32_t>&>( \
|
||||
nullable_column->get_nested_column()); \
|
||||
uint32_t int32_val = 0; \
|
||||
char* int32_val_ptr = (char*)&int32_val; \
|
||||
memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
|
||||
auto& data_array_uint32_t = predicate_column.get_data(); \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = (data_array_uint32_t[i] OP int32_val) && (!null_bitmap[i]); \
|
||||
} \
|
||||
} else { \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
auto& predicate_column = \
|
||||
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
|
||||
auto& data_array = predicate_column.get_data(); \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = data_array[i] OP _value; \
|
||||
if constexpr (std::is_same_v<T, uint24_t>) { \
|
||||
auto& predicate_column = \
|
||||
reinterpret_cast<vectorized::PredicateColumnType<uint32_t>&>(column); \
|
||||
uint32_t int32_val = 0; \
|
||||
char* int32_val_ptr = (char*)&int32_val; \
|
||||
memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
|
||||
auto& data_array = predicate_column.get_data(); \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = data_array[i] OP int32_val; \
|
||||
} \
|
||||
} else { \
|
||||
auto& predicate_column = \
|
||||
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
|
||||
auto& data_array = predicate_column.get_data(); \
|
||||
for (uint16_t i = 0; i < size; i++) { \
|
||||
flags[i] = data_array[i] OP _value; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (_opposite) { \
|
||||
@ -502,6 +530,7 @@ COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
|
||||
template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value, \
|
||||
bool opposite); \
|
||||
template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value, bool opposite); \
|
||||
template CLASS<uint32_t>::CLASS(uint32_t column_id, const uint32_t& value, bool opposite); \
|
||||
template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value, bool opposite); \
|
||||
template CLASS<bool>::CLASS(uint32_t column_id, const bool& value, bool opposite);
|
||||
|
||||
@ -663,6 +692,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterEqualPredicate)
|
||||
bool* flags) const; \
|
||||
template void CLASS<uint24_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
|
||||
bool* flags) const; \
|
||||
template void CLASS<uint32_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
|
||||
bool* flags) const; \
|
||||
template void CLASS<uint64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
|
||||
bool* flags) const; \
|
||||
template void CLASS<bool>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
|
||||
|
||||
@ -290,7 +290,37 @@ private:
|
||||
uint16_t* sel, uint16_t size) const {
|
||||
uint16_t new_size = 0;
|
||||
|
||||
if (column->is_column_dictionary()) {
|
||||
if constexpr (std::is_same_v<T, uint24_t>) {
|
||||
auto* nested_col_ptr =
|
||||
vectorized::check_and_get_column<vectorized::PredicateColumnType<uint32_t>>(
|
||||
column);
|
||||
auto& data_array = nested_col_ptr->get_data();
|
||||
|
||||
uint24_t tmp_uint24_value;
|
||||
for (uint16_t i = 0; i < size; i++) {
|
||||
uint16_t idx = sel[i];
|
||||
if constexpr (is_nullable) {
|
||||
if ((*null_map)[idx]) {
|
||||
if constexpr (is_opposite) {
|
||||
sel[new_size++] = idx;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy((char*)(&tmp_uint24_value), (char*)(&(data_array[idx])), sizeof(uint24_t));
|
||||
if constexpr (!is_opposite) {
|
||||
if (_operator(_values.find(tmp_uint24_value), _values.end())) {
|
||||
sel[new_size++] = idx;
|
||||
}
|
||||
} else {
|
||||
if (!_operator(_values.find(tmp_uint24_value), _values.end())) {
|
||||
sel[new_size++] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (column->is_column_dictionary()) {
|
||||
if constexpr (std::is_same_v<T, StringValue>) {
|
||||
auto* nested_col_ptr = vectorized::check_and_get_column<
|
||||
vectorized::ColumnDictionary<vectorized::Int32>>(column);
|
||||
|
||||
@ -661,7 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
|
||||
predicate->type() == PredicateType::IN_LIST ||
|
||||
predicate->type() == PredicateType::NOT_IN_LIST ||
|
||||
predicate->type() == PredicateType::IS_NULL ||
|
||||
predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE ||
|
||||
predicate->type() == PredicateType::IS_NOT_NULL ||
|
||||
type == OLAP_FIELD_TYPE_DECIMAL) {
|
||||
short_cir_pred_col_id_set.insert(cid);
|
||||
_short_cir_eval_predicate.push_back(predicate);
|
||||
|
||||
@ -151,7 +151,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type)
|
||||
return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create();
|
||||
|
||||
case OLAP_FIELD_TYPE_DATE:
|
||||
return doris::vectorized::PredicateColumnType<uint24_t>::create();
|
||||
return doris::vectorized::PredicateColumnType<uint32_t>::create();
|
||||
|
||||
case OLAP_FIELD_TYPE_DATETIME:
|
||||
return doris::vectorized::PredicateColumnType<uint64_t>::create();
|
||||
|
||||
@ -140,6 +140,8 @@ public:
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
const uint8_t* get_data() const { return data; }
|
||||
|
||||
private:
|
||||
uint8_t data[3];
|
||||
} __attribute__((packed));
|
||||
|
||||
@ -169,17 +169,16 @@ public:
|
||||
}
|
||||
|
||||
void insert_date_column(const char* data_ptr, size_t num) {
|
||||
size_t value_size = sizeof(uint24_t);
|
||||
size_t input_value_size = sizeof(uint24_t);
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
const char* cur_ptr = data_ptr + value_size * i;
|
||||
uint64_t value = 0;
|
||||
value = *(unsigned char*)(cur_ptr + 2);
|
||||
value <<= 8;
|
||||
value |= *(unsigned char*)(cur_ptr + 1);
|
||||
value <<= 8;
|
||||
value |= *(unsigned char*)(cur_ptr);
|
||||
vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value);
|
||||
this->insert_data(reinterpret_cast<char*>(&date), 0);
|
||||
uint64_t val = 0;
|
||||
memcpy((char*)(&val), data_ptr, input_value_size);
|
||||
data_ptr += input_value_size;
|
||||
|
||||
VecDateTimeValue date;
|
||||
date.set_olap_date(val);
|
||||
data.push_back_without_reserve(unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -63,6 +63,20 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void insert_date32_to_res_column(const uint16_t* sel, size_t sel_size,
|
||||
vectorized::ColumnVector<Int64>* res_ptr) {
|
||||
res_ptr->reserve(sel_size);
|
||||
auto& res_data = res_ptr->get_data();
|
||||
|
||||
for (size_t i = 0; i < sel_size; i++) {
|
||||
uint64_t val = data[sel[i]];
|
||||
VecDateTimeValue date;
|
||||
date.set_olap_date(val);
|
||||
res_data.push_back_without_reserve(
|
||||
unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
|
||||
}
|
||||
}
|
||||
|
||||
void insert_datetime_to_res_column(const uint16_t* sel, size_t sel_size,
|
||||
vectorized::ColumnVector<Int64>* res_ptr) {
|
||||
for (size_t i = 0; i < sel_size; i++) {
|
||||
@ -205,6 +219,21 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void insert_many_date(const char* data_ptr, size_t num) {
|
||||
size_t intput_type_size = sizeof(uint24_t);
|
||||
size_t res_type_size = sizeof(uint32_t);
|
||||
char* input_data_ptr = const_cast<char*>(data_ptr);
|
||||
|
||||
char* res_ptr = (char*)data.get_end_ptr();
|
||||
memset(res_ptr, 0, res_type_size * num);
|
||||
for (int i = 0; i < num; i++) {
|
||||
memcpy(res_ptr, input_data_ptr, intput_type_size);
|
||||
res_ptr += res_type_size;
|
||||
input_data_ptr += intput_type_size;
|
||||
}
|
||||
data.set_end_ptr(res_ptr);
|
||||
}
|
||||
|
||||
void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
|
||||
if constexpr (std::is_same_v<T, decimal12_t>) {
|
||||
insert_many_in_copy_way(data_ptr, num);
|
||||
@ -212,6 +241,10 @@ public:
|
||||
insert_many_in_copy_way(data_ptr, num);
|
||||
} else if constexpr (std::is_same_v<T, StringValue>) {
|
||||
// here is unreachable, just for compilation to be able to pass
|
||||
} else if constexpr (std::is_same_v<
|
||||
T,
|
||||
uint32_t>) { // todo(wb) a trick type judge here,need refactor
|
||||
insert_many_date(data_ptr, num);
|
||||
} else {
|
||||
insert_many_default_type(data_ptr, num);
|
||||
}
|
||||
@ -405,6 +438,9 @@ public:
|
||||
} else if constexpr (std::is_same_v<T, uint24_t>) {
|
||||
insert_date_to_res_column(sel, sel_size,
|
||||
reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
|
||||
} else if constexpr (std::is_same_v<T, uint32_t>) { // a trick type judge, need refactor it.
|
||||
insert_date32_to_res_column(
|
||||
sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
|
||||
} else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) {
|
||||
insert_default_value_res_column(
|
||||
sel, sel_size,
|
||||
|
||||
@ -223,6 +223,19 @@ public:
|
||||
return check_range_and_set_time(year, month, day, hour, minute, second, _type);
|
||||
}
|
||||
|
||||
//note(wb) not check in this method
|
||||
void inline set_olap_date(uint64_t olap_date_val) {
|
||||
_neg = 0;
|
||||
_type = TIME_DATE;
|
||||
|
||||
_day = olap_date_val & 0x1f;
|
||||
_month = (olap_date_val >> 5) & 0x0f;
|
||||
_year = olap_date_val >> 9;
|
||||
_hour = 0;
|
||||
_minute = 0;
|
||||
_second = 0;
|
||||
}
|
||||
|
||||
uint64_t to_olap_date() const {
|
||||
uint64_t val;
|
||||
val = _year;
|
||||
|
||||
Reference in New Issue
Block a user