[optimize](storage)optimize date in storage layer (#8967)

* opt date in storage

* code style

Co-authored-by: Wang Bo <wangbo36@meituan.com>
This commit is contained in:
wangbo
2022-06-23 12:29:10 +08:00
committed by GitHub
parent 815ea35578
commit d73f170eeb
8 changed files with 131 additions and 20 deletions

View File

@ -224,6 +224,9 @@ COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
// todo(wb) for date type we use uint32_t to save it but using Predicate<uint24> to evaluate it.
// This is done for compatibility with Row Version predicate.
// We can use Predicate<uint32_t> for date after Row Version is removed.
#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \
template <class T> \
void CLASS<T>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \
@ -236,15 +239,40 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>( \
*(nullable_column->get_null_map_column_ptr())) \
.get_data(); \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
if constexpr (std::is_same_v<T, uint24_t>) { \
auto& predicate_column = \
reinterpret_cast<const vectorized::PredicateColumnType<uint32_t>&>( \
nullable_column->get_nested_column()); \
uint32_t int32_val = 0; \
char* int32_val_ptr = (char*)&int32_val; \
memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
auto& data_array_uint32_t = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = (data_array_uint32_t[i] OP int32_val) && (!null_bitmap[i]); \
} \
} else { \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \
} \
} \
} else { \
auto& predicate_column = \
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
auto& data_array = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = data_array[i] OP _value; \
if constexpr (std::is_same_v<T, uint24_t>) { \
auto& predicate_column = \
reinterpret_cast<vectorized::PredicateColumnType<uint32_t>&>(column); \
uint32_t int32_val = 0; \
char* int32_val_ptr = (char*)&int32_val; \
memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \
auto& data_array = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = data_array[i] OP int32_val; \
} \
} else { \
auto& predicate_column = \
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \
auto& data_array = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; i++) { \
flags[i] = data_array[i] OP _value; \
} \
} \
} \
if (_opposite) { \
@ -502,6 +530,7 @@ COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value, \
bool opposite); \
template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value, bool opposite); \
template CLASS<uint32_t>::CLASS(uint32_t column_id, const uint32_t& value, bool opposite); \
template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value, bool opposite); \
template CLASS<bool>::CLASS(uint32_t column_id, const bool& value, bool opposite);
@ -663,6 +692,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterEqualPredicate)
bool* flags) const; \
template void CLASS<uint24_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
bool* flags) const; \
template void CLASS<uint32_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
bool* flags) const; \
template void CLASS<uint64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \
bool* flags) const; \
template void CLASS<bool>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \

View File

@ -290,7 +290,37 @@ private:
uint16_t* sel, uint16_t size) const {
uint16_t new_size = 0;
if (column->is_column_dictionary()) {
if constexpr (std::is_same_v<T, uint24_t>) {
auto* nested_col_ptr =
vectorized::check_and_get_column<vectorized::PredicateColumnType<uint32_t>>(
column);
auto& data_array = nested_col_ptr->get_data();
uint24_t tmp_uint24_value;
for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
if constexpr (is_nullable) {
if ((*null_map)[idx]) {
if constexpr (is_opposite) {
sel[new_size++] = idx;
}
continue;
}
}
memcpy((char*)(&tmp_uint24_value), (char*)(&(data_array[idx])), sizeof(uint24_t));
if constexpr (!is_opposite) {
if (_operator(_values.find(tmp_uint24_value), _values.end())) {
sel[new_size++] = idx;
}
} else {
if (!_operator(_values.find(tmp_uint24_value), _values.end())) {
sel[new_size++] = idx;
}
}
}
} else if (column->is_column_dictionary()) {
if constexpr (std::is_same_v<T, StringValue>) {
auto* nested_col_ptr = vectorized::check_and_get_column<
vectorized::ColumnDictionary<vectorized::Int32>>(column);

View File

@ -661,7 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
predicate->type() == PredicateType::IN_LIST ||
predicate->type() == PredicateType::NOT_IN_LIST ||
predicate->type() == PredicateType::IS_NULL ||
predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE ||
predicate->type() == PredicateType::IS_NOT_NULL ||
type == OLAP_FIELD_TYPE_DECIMAL) {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);

View File

@ -151,7 +151,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type)
return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create();
case OLAP_FIELD_TYPE_DATE:
return doris::vectorized::PredicateColumnType<uint24_t>::create();
return doris::vectorized::PredicateColumnType<uint32_t>::create();
case OLAP_FIELD_TYPE_DATETIME:
return doris::vectorized::PredicateColumnType<uint64_t>::create();

View File

@ -140,6 +140,8 @@ public:
return std::string(buf);
}
const uint8_t* get_data() const { return data; }
private:
uint8_t data[3];
} __attribute__((packed));

View File

@ -169,17 +169,16 @@ public:
}
void insert_date_column(const char* data_ptr, size_t num) {
size_t value_size = sizeof(uint24_t);
size_t input_value_size = sizeof(uint24_t);
for (int i = 0; i < num; i++) {
const char* cur_ptr = data_ptr + value_size * i;
uint64_t value = 0;
value = *(unsigned char*)(cur_ptr + 2);
value <<= 8;
value |= *(unsigned char*)(cur_ptr + 1);
value <<= 8;
value |= *(unsigned char*)(cur_ptr);
vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value);
this->insert_data(reinterpret_cast<char*>(&date), 0);
uint64_t val = 0;
memcpy((char*)(&val), data_ptr, input_value_size);
data_ptr += input_value_size;
VecDateTimeValue date;
date.set_olap_date(val);
data.push_back_without_reserve(unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
}
}

View File

@ -63,6 +63,20 @@ private:
}
}
void insert_date32_to_res_column(const uint16_t* sel, size_t sel_size,
vectorized::ColumnVector<Int64>* res_ptr) {
res_ptr->reserve(sel_size);
auto& res_data = res_ptr->get_data();
for (size_t i = 0; i < sel_size; i++) {
uint64_t val = data[sel[i]];
VecDateTimeValue date;
date.set_olap_date(val);
res_data.push_back_without_reserve(
unaligned_load<Int64>(reinterpret_cast<char*>(&date)));
}
}
void insert_datetime_to_res_column(const uint16_t* sel, size_t sel_size,
vectorized::ColumnVector<Int64>* res_ptr) {
for (size_t i = 0; i < sel_size; i++) {
@ -205,6 +219,21 @@ public:
}
}
void insert_many_date(const char* data_ptr, size_t num) {
size_t intput_type_size = sizeof(uint24_t);
size_t res_type_size = sizeof(uint32_t);
char* input_data_ptr = const_cast<char*>(data_ptr);
char* res_ptr = (char*)data.get_end_ptr();
memset(res_ptr, 0, res_type_size * num);
for (int i = 0; i < num; i++) {
memcpy(res_ptr, input_data_ptr, intput_type_size);
res_ptr += res_type_size;
input_data_ptr += intput_type_size;
}
data.set_end_ptr(res_ptr);
}
void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
if constexpr (std::is_same_v<T, decimal12_t>) {
insert_many_in_copy_way(data_ptr, num);
@ -212,6 +241,10 @@ public:
insert_many_in_copy_way(data_ptr, num);
} else if constexpr (std::is_same_v<T, StringValue>) {
// here is unreachable, just for compilation to be able to pass
} else if constexpr (std::is_same_v<
T,
uint32_t>) { // todo(wb) a trick type judge here,need refactor
insert_many_date(data_ptr, num);
} else {
insert_many_default_type(data_ptr, num);
}
@ -405,6 +438,9 @@ public:
} else if constexpr (std::is_same_v<T, uint24_t>) {
insert_date_to_res_column(sel, sel_size,
reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
} else if constexpr (std::is_same_v<T, uint32_t>) { // a trick type judge, need refactor it.
insert_date32_to_res_column(
sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr));
} else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) {
insert_default_value_res_column(
sel, sel_size,

View File

@ -223,6 +223,19 @@ public:
return check_range_and_set_time(year, month, day, hour, minute, second, _type);
}
//note(wb) not check in this method
void inline set_olap_date(uint64_t olap_date_val) {
_neg = 0;
_type = TIME_DATE;
_day = olap_date_val & 0x1f;
_month = (olap_date_val >> 5) & 0x0f;
_year = olap_date_val >> 9;
_hour = 0;
_minute = 0;
_second = 0;
}
uint64_t to_olap_date() const {
uint64_t val;
val = _year;