pick https://github.com/apache/doris/pull/40769 Co-authored-by: Jerry Hu <mrhhsg@gmail.com>
This commit is contained in:
@ -31,18 +31,19 @@
|
||||
namespace doris::vectorized {
|
||||
|
||||
ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_)
|
||||
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) {
|
||||
: NullMapProvider(std::move(null_map_)), nested_column(std::move(nested_column_)) {
|
||||
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
|
||||
nested_column = get_nested_column().convert_to_full_column_if_const();
|
||||
|
||||
// after convert const column to full column, it may be a nullable column
|
||||
if (nested_column->is_nullable()) {
|
||||
assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const ColumnUInt8&)*null_map);
|
||||
null_map = assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
|
||||
assert_cast<ColumnNullable&>(*nested_column)
|
||||
.apply_null_map(static_cast<const ColumnUInt8&>(get_null_map_column()));
|
||||
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
|
||||
nested_column = assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
|
||||
}
|
||||
|
||||
if (is_column_const(*null_map)) {
|
||||
if (is_column_const(get_null_map_column())) [[unlikely]] {
|
||||
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
|
||||
"ColumnNullable cannot have constant null map");
|
||||
__builtin_unreachable();
|
||||
@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t
|
||||
nested_column->update_xxHash_with_value(start, end, hash, nullptr);
|
||||
} else {
|
||||
const auto* __restrict real_null_data =
|
||||
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
|
||||
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
|
||||
for (int i = start; i < end; ++i) {
|
||||
if (real_null_data[i] != 0) {
|
||||
hash = HashUtil::xxHash64NullWithSeed(hash);
|
||||
@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t& h
|
||||
nested_column->update_crc_with_value(start, end, hash, nullptr);
|
||||
} else {
|
||||
const auto* __restrict real_null_data =
|
||||
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
|
||||
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
|
||||
for (int i = start; i < end; ++i) {
|
||||
if (real_null_data[i] != 0) {
|
||||
hash = HashUtil::zlib_crc_hash_null(hash);
|
||||
@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris::
|
||||
auto s = rows;
|
||||
DCHECK(s == size());
|
||||
const auto* __restrict real_null_data =
|
||||
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
|
||||
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
|
||||
if (!has_null()) {
|
||||
nested_column->update_crcs_with_value(hashes, type, rows, offset, nullptr);
|
||||
} else {
|
||||
@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes,
|
||||
DCHECK(null_data == nullptr);
|
||||
auto s = size();
|
||||
const auto* __restrict real_null_data =
|
||||
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
|
||||
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
|
||||
if (!has_null()) {
|
||||
nested_column->update_hashes_with_value(hashes, nullptr);
|
||||
} else {
|
||||
@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
|
||||
void ColumnNullable::insert_data(const char* pos, size_t length) {
|
||||
if (pos == nullptr) {
|
||||
get_nested_column().insert_default();
|
||||
_get_null_map_data().push_back(1);
|
||||
get_null_map_data().push_back(1);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
} else {
|
||||
get_nested_column().insert_data(pos, length);
|
||||
_get_null_map_data().push_back(0);
|
||||
_push_false_to_nullmap(1);
|
||||
}
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) {
|
||||
auto& null_map_data = _get_null_map_data();
|
||||
for (size_t i = 0; i != num; ++i) {
|
||||
if (strings[i].data == nullptr) {
|
||||
nested_column->insert_default();
|
||||
null_map_data.push_back(1);
|
||||
get_null_map_data().push_back(1);
|
||||
_has_null = true;
|
||||
} else {
|
||||
nested_column->insert_data(strings[i].data, strings[i].size);
|
||||
null_map_data.push_back(0);
|
||||
_push_false_to_nullmap(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -227,13 +228,14 @@ const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
|
||||
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
|
||||
pos += sizeof(val);
|
||||
|
||||
_get_null_map_data().push_back(val);
|
||||
get_null_map_data().push_back(val);
|
||||
|
||||
if (val == 0) {
|
||||
pos = get_nested_column().deserialize_and_insert_from_arena(pos);
|
||||
} else {
|
||||
get_nested_column().insert_default();
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
return pos;
|
||||
@ -265,7 +267,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>& keys, size_t num_rows
|
||||
}
|
||||
|
||||
void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t num_rows) {
|
||||
auto& arr = _get_null_map_data();
|
||||
auto& arr = get_null_map_data();
|
||||
const size_t old_size = arr.size();
|
||||
arr.resize(old_size + num_rows);
|
||||
|
||||
@ -288,21 +290,15 @@ void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
|
||||
void ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::IColumn& src,
|
||||
size_t start, size_t length) {
|
||||
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
|
||||
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
|
||||
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
|
||||
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column, start,
|
||||
length);
|
||||
const auto& src_null_map_data = nullable_col.get_null_map_data();
|
||||
_has_null = has_null();
|
||||
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) {
|
||||
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
|
||||
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
|
||||
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
|
||||
get_nested_column().insert_range_from(*nullable_col.nested_column, start, length);
|
||||
const auto& src_null_map_data = nullable_col.get_null_map_data();
|
||||
_has_null = has_null();
|
||||
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
|
||||
@ -310,9 +306,8 @@ void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* ind
|
||||
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
|
||||
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
|
||||
indices_end);
|
||||
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
|
||||
indices_end);
|
||||
_need_update_has_null = true;
|
||||
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
|
||||
indices_end);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
|
||||
@ -321,17 +316,18 @@ void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
|
||||
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
|
||||
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
|
||||
indices_end);
|
||||
_get_null_map_column().insert_many_defaults(indices_end - indices_begin);
|
||||
_push_false_to_nullmap(indices_end - indices_begin);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert(const Field& x) {
|
||||
if (x.is_null()) {
|
||||
get_nested_column().insert_default();
|
||||
_get_null_map_data().push_back(1);
|
||||
get_null_map_data().push_back(1);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
} else {
|
||||
get_nested_column().insert(x);
|
||||
_get_null_map_data().push_back(0);
|
||||
_push_false_to_nullmap(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -339,19 +335,18 @@ void ColumnNullable::insert_from(const IColumn& src, size_t n) {
|
||||
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
|
||||
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
|
||||
auto is_null = src_concrete.get_null_map_data()[n];
|
||||
_has_null |= is_null;
|
||||
_get_null_map_data().push_back(is_null);
|
||||
get_null_map_data().push_back(is_null);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
|
||||
get_nested_column().insert_from(src, n);
|
||||
_get_null_map_data().push_back(0);
|
||||
_push_false_to_nullmap(1);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start,
|
||||
size_t length) {
|
||||
get_nested_column().insert_range_from(src, start, length);
|
||||
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
|
||||
_push_false_to_nullmap(length);
|
||||
}
|
||||
|
||||
void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position,
|
||||
@ -380,15 +375,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
|
||||
}
|
||||
|
||||
Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) {
|
||||
const auto* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>(col_ptr);
|
||||
auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
|
||||
ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
|
||||
ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
|
||||
|
||||
/// `get_null_map_data` will set `_need_update_has_null` to true
|
||||
auto& res_nullmap = nullable_col_ptr->get_null_map_data();
|
||||
|
||||
RETURN_IF_ERROR(get_nested_column().filter_by_selector(
|
||||
sel, sel_size, const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
|
||||
//insert cur nullmap into result nullmap which is empty
|
||||
auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
|
||||
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
|
||||
->get_data();
|
||||
DCHECK(res_nullmap.empty());
|
||||
res_nullmap.resize(sel_size);
|
||||
auto& cur_nullmap = get_null_map_column().get_data();
|
||||
@ -535,15 +529,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direct
|
||||
}
|
||||
}
|
||||
}
|
||||
//
|
||||
//void ColumnNullable::gather(ColumnGathererStream & gatherer)
|
||||
//{
|
||||
// gatherer.gather(*this);
|
||||
//}
|
||||
|
||||
void ColumnNullable::reserve(size_t n) {
|
||||
get_nested_column().reserve(n);
|
||||
_get_null_map_data().reserve(n);
|
||||
get_null_map_data(false).reserve(n);
|
||||
}
|
||||
|
||||
void ColumnNullable::resize(size_t n) {
|
||||
@ -595,7 +584,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& other) {
|
||||
}
|
||||
|
||||
void ColumnNullable::check_consistency() const {
|
||||
if (null_map->size() != get_nested_column().size()) {
|
||||
if (get_null_map_column().size() != get_nested_column().size()) {
|
||||
throw Exception(ErrorCode::INTERNAL_ERROR,
|
||||
"Sizes of nested column and null map of Nullable column are not equal");
|
||||
}
|
||||
@ -609,8 +598,8 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
|
||||
}
|
||||
|
||||
void ColumnNullable::_update_has_null() {
|
||||
const UInt8* null_pos = _get_null_map_data().data();
|
||||
_has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
|
||||
const UInt8* null_pos = get_null_map_data().data();
|
||||
_has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
@ -50,6 +49,69 @@ class ColumnSorter;
|
||||
using NullMap = ColumnUInt8::Container;
|
||||
using ConstNullMapPtr = const NullMap*;
|
||||
|
||||
/// use this to avoid directly access null_map forgetting modify _need_update_has_null. see more in inner comments
|
||||
class NullMapProvider {
|
||||
public:
|
||||
NullMapProvider() = default;
|
||||
NullMapProvider(MutableColumnPtr&& null_map) : _null_map(std::move(null_map)) {}
|
||||
void reset_null_map(MutableColumnPtr&& null_map) { _null_map = std::move(null_map); }
|
||||
|
||||
// return the column that represents the byte map. if want use null_map, just call this.
|
||||
const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
|
||||
// for functions getting nullmap, we assume it will modify it. so set `_need_update_has_null` to true. if you know it wouldn't,
|
||||
// call with arg false. but for the ops which will set _has_null themselves, call `update_has_null()`
|
||||
MutableColumnPtr get_null_map_column_ptr(bool may_change = true) {
|
||||
if (may_change) {
|
||||
_need_update_has_null = true;
|
||||
}
|
||||
return _null_map->assume_mutable();
|
||||
}
|
||||
ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) {
|
||||
if (may_change) {
|
||||
_need_update_has_null = true;
|
||||
}
|
||||
return _null_map;
|
||||
}
|
||||
|
||||
ColumnUInt8& get_null_map_column(bool may_change = true) {
|
||||
if (may_change) {
|
||||
_need_update_has_null = true;
|
||||
}
|
||||
return assert_cast<ColumnUInt8&>(*_null_map);
|
||||
}
|
||||
const ColumnUInt8& get_null_map_column() const {
|
||||
return assert_cast<const ColumnUInt8&>(*_null_map);
|
||||
}
|
||||
|
||||
NullMap& get_null_map_data(bool may_change = true) {
|
||||
return get_null_map_column(may_change).get_data();
|
||||
}
|
||||
const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); }
|
||||
|
||||
void clear_null_map() { assert_cast<ColumnUInt8*>(_null_map.get())->clear(); }
|
||||
|
||||
void update_has_null(bool new_value) {
|
||||
_has_null = new_value;
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Here we have three variables which serve for `has_null()` judgement. If we have known the nullity of object, no need
|
||||
* to check through the `null_map` to get the answer until the next time we modify it. Here `_has_null` is just the answer
|
||||
* we cached. `_need_update_has_null` indicates there's modification or not since we got `_has_null()` last time. So in
|
||||
* `_has_null()` we can check the two vars to know if there's need to update `has_null` or not.
|
||||
* If you just want QUERY BUT NOT MODIFY, make sure the caller is const. There will be no perf overhead for const overload.
|
||||
* Otherwise, this class, as the base class, will make it no possible to directly visit `null_map` forgetting to change the
|
||||
* protected flags. Just call the interface is ok.
|
||||
*/
|
||||
bool _need_update_has_null = true;
|
||||
bool _has_null = true;
|
||||
|
||||
private:
|
||||
IColumn::WrappedPtr _null_map;
|
||||
};
|
||||
|
||||
/// Class that specifies nullable columns. A nullable column represents
|
||||
/// a column, which may have any type, provided with the possibility of
|
||||
/// storing NULL values. For this purpose, a ColumnNullable object stores
|
||||
@ -59,7 +121,7 @@ using ConstNullMapPtr = const NullMap*;
|
||||
/// over a bitmap because columns are usually stored on disk as compressed
|
||||
/// files. In this regard, using a bitmap instead of a byte map would
|
||||
/// greatly complicate the implementation with little to no benefits.
|
||||
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
|
||||
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public NullMapProvider {
|
||||
private:
|
||||
friend class COWHelper<IColumn, ColumnNullable>;
|
||||
|
||||
@ -88,9 +150,11 @@ public:
|
||||
const char* get_family_name() const override { return "Nullable"; }
|
||||
std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; }
|
||||
MutableColumnPtr clone_resized(size_t size) const override;
|
||||
size_t size() const override { return assert_cast<const ColumnUInt8&>(*null_map).size(); }
|
||||
size_t size() const override {
|
||||
return assert_cast<const ColumnUInt8&>(get_null_map_column()).size();
|
||||
}
|
||||
PURE bool is_null_at(size_t n) const override {
|
||||
return assert_cast<const ColumnUInt8&>(*null_map).get_data()[n] != 0;
|
||||
return assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data()[n] != 0;
|
||||
}
|
||||
bool is_default_at(size_t n) const override { return is_null_at(n); }
|
||||
Field operator[](size_t n) const override;
|
||||
@ -139,8 +203,13 @@ public:
|
||||
assert_cast<ColumnType*>(nested_column.get())
|
||||
->insert_from(src_concrete.get_nested_column(), n);
|
||||
auto is_null = src_concrete.get_null_map_data()[n];
|
||||
_has_null |= is_null;
|
||||
_get_null_map_data().push_back(is_null);
|
||||
if (is_null) {
|
||||
get_null_map_data().push_back(1);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
} else {
|
||||
_push_false_to_nullmap(1);
|
||||
}
|
||||
}
|
||||
|
||||
void insert_from_not_nullable(const IColumn& src, size_t n);
|
||||
@ -148,18 +217,18 @@ public:
|
||||
void insert_many_from_not_nullable(const IColumn& src, size_t position, size_t length);
|
||||
|
||||
void insert_many_fix_len_data(const char* pos, size_t num) override {
|
||||
_get_null_map_column().insert_many_vals(0, num);
|
||||
_push_false_to_nullmap(num);
|
||||
get_nested_column().insert_many_fix_len_data(pos, num);
|
||||
}
|
||||
|
||||
void insert_many_raw_data(const char* pos, size_t num) override {
|
||||
_get_null_map_column().insert_many_vals(0, num);
|
||||
_push_false_to_nullmap(num);
|
||||
get_nested_column().insert_many_raw_data(pos, num);
|
||||
}
|
||||
|
||||
void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
|
||||
size_t data_num, uint32_t dict_num) override {
|
||||
_get_null_map_column().insert_many_vals(0, data_num);
|
||||
_push_false_to_nullmap(data_num);
|
||||
get_nested_column().insert_many_dict_data(data_array, start_index, dict, data_num,
|
||||
dict_num);
|
||||
}
|
||||
@ -169,38 +238,40 @@ public:
|
||||
if (UNLIKELY(num == 0)) {
|
||||
return;
|
||||
}
|
||||
_get_null_map_column().insert_many_vals(0, num);
|
||||
_push_false_to_nullmap(num);
|
||||
get_nested_column().insert_many_continuous_binary_data(data, offsets, num);
|
||||
}
|
||||
|
||||
void insert_many_binary_data(char* data_array, uint32_t* len_array,
|
||||
uint32_t* start_offset_array, size_t num) override {
|
||||
_get_null_map_column().insert_many_vals(0, num);
|
||||
_push_false_to_nullmap(num);
|
||||
get_nested_column().insert_many_binary_data(data_array, len_array, start_offset_array, num);
|
||||
}
|
||||
|
||||
void insert_default() override {
|
||||
get_nested_column().insert_default();
|
||||
_get_null_map_data().push_back(1);
|
||||
get_null_map_data().push_back(1);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
void insert_many_defaults(size_t length) override {
|
||||
get_nested_column().insert_many_defaults(length);
|
||||
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 1);
|
||||
get_null_map_data().resize_fill(get_null_map_data().size() + length, 1);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
void insert_not_null_elements(size_t num) {
|
||||
get_nested_column().insert_many_defaults(num);
|
||||
_get_null_map_column().insert_many_vals(0, num);
|
||||
_has_null = false;
|
||||
_push_false_to_nullmap(num);
|
||||
}
|
||||
|
||||
void insert_null_elements(int num) {
|
||||
get_nested_column().insert_many_defaults(num);
|
||||
_get_null_map_column().insert_many_vals(1, num);
|
||||
get_null_map_column().insert_many_vals(1, num);
|
||||
_has_null = true;
|
||||
_need_update_has_null = false;
|
||||
}
|
||||
|
||||
void pop_back(size_t n) override;
|
||||
@ -252,7 +323,7 @@ public:
|
||||
|
||||
void for_each_subcolumn(ColumnCallback callback) override {
|
||||
callback(nested_column);
|
||||
callback(null_map);
|
||||
callback(get_null_map());
|
||||
}
|
||||
|
||||
bool structure_equals(const IColumn& rhs) const override {
|
||||
@ -279,11 +350,13 @@ public:
|
||||
bool values_have_fixed_size() const override { return nested_column->values_have_fixed_size(); }
|
||||
|
||||
bool is_exclusive() const override {
|
||||
return IColumn::is_exclusive() && nested_column->is_exclusive() && null_map->is_exclusive();
|
||||
return IColumn::is_exclusive() && nested_column->is_exclusive() &&
|
||||
get_null_map_column().is_exclusive();
|
||||
}
|
||||
|
||||
size_t size_of_value_if_fixed() const override {
|
||||
return null_map->size_of_value_if_fixed() + nested_column->size_of_value_if_fixed();
|
||||
return get_null_map_column().size_of_value_if_fixed() +
|
||||
nested_column->size_of_value_if_fixed();
|
||||
}
|
||||
|
||||
bool only_null() const override { return size() == 1 && is_null_at(0); }
|
||||
@ -299,32 +372,12 @@ public:
|
||||
|
||||
MutableColumnPtr get_nested_column_ptr() { return nested_column->assume_mutable(); }
|
||||
|
||||
/// Return the column that represents the byte map.
|
||||
const ColumnPtr& get_null_map_column_ptr() const { return null_map; }
|
||||
|
||||
MutableColumnPtr get_null_map_column_ptr() {
|
||||
_need_update_has_null = true;
|
||||
return null_map->assume_mutable();
|
||||
}
|
||||
|
||||
ColumnUInt8& get_null_map_column() {
|
||||
_need_update_has_null = true;
|
||||
return assert_cast<ColumnUInt8&>(*null_map);
|
||||
}
|
||||
const ColumnUInt8& get_null_map_column() const {
|
||||
return assert_cast<const ColumnUInt8&>(*null_map);
|
||||
}
|
||||
|
||||
void clear() override {
|
||||
null_map->clear();
|
||||
clear_null_map();
|
||||
nested_column->clear();
|
||||
_has_null = false;
|
||||
}
|
||||
|
||||
NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
|
||||
|
||||
const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); }
|
||||
|
||||
/// Apply the null byte map of a specified nullable column onto the
|
||||
/// null byte map of the current column by performing an element-wise OR
|
||||
/// between both byte maps. This method is used to determine the null byte
|
||||
@ -349,7 +402,8 @@ public:
|
||||
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
|
||||
DCHECK(size() > self_row);
|
||||
const auto& nullable_rhs = assert_cast<const ColumnNullable&>(rhs);
|
||||
null_map->replace_column_data(*nullable_rhs.null_map, row, self_row);
|
||||
get_null_map_column().replace_column_data(nullable_rhs.get_null_map_column(), row,
|
||||
self_row);
|
||||
|
||||
if (!nullable_rhs.is_null_at(row)) {
|
||||
nested_column->replace_column_data(*nullable_rhs.nested_column, row, self_row);
|
||||
@ -398,19 +452,15 @@ public:
|
||||
ColumnPtr index(const IColumn& indexes, size_t limit) const override;
|
||||
|
||||
private:
|
||||
// the two functions will not update `_need_update_has_null`
|
||||
ColumnUInt8& _get_null_map_column() { return assert_cast<ColumnUInt8&>(*null_map); }
|
||||
NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); }
|
||||
|
||||
WrappedPtr nested_column;
|
||||
WrappedPtr null_map;
|
||||
|
||||
bool _need_update_has_null = true;
|
||||
bool _has_null;
|
||||
|
||||
void _update_has_null();
|
||||
|
||||
template <bool negative>
|
||||
void apply_null_map_impl(const ColumnUInt8& map);
|
||||
|
||||
// push not null value wouldn't change the nullity. no need to update _has_null
|
||||
void _push_false_to_nullmap(size_t num) { get_null_map_column(false).insert_many_vals(0, num); }
|
||||
|
||||
WrappedPtr nested_column;
|
||||
};
|
||||
|
||||
ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);
|
||||
|
||||
112
be/test/vec/columns/column_nullable_test.cpp
Normal file
112
be/test/vec/columns/column_nullable_test.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "vec/columns/column_nullable.h"
|
||||
|
||||
#include <gtest/gtest-message.h>
|
||||
#include <gtest/gtest-test-part.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "column_nullable_test.h"
|
||||
#include "common/status.h"
|
||||
#include "runtime/define_primitive_type.h"
|
||||
#include "vec/columns/columns_number.h"
|
||||
#include "vec/columns/predicate_column.h"
|
||||
#include "vec/core/field.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
TEST(ColumnNullableTest, NullTest) {
|
||||
ColumnNullable::MutablePtr null_col = create_column_nullable<Int64>(500, true);
|
||||
EXPECT_TRUE(null_col->has_null());
|
||||
|
||||
ColumnNullable::MutablePtr dst_col =
|
||||
ColumnNullable::create(create_nested_column<Int64>(10), ColumnUInt8::create(10));
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
|
||||
ColumnInt64::MutablePtr source_col = ColumnInt64::create();
|
||||
source_col->insert_range_of_integer(0, 100);
|
||||
|
||||
dst_col->insert(Field());
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_many_from_not_nullable(*source_col, 0, 10);
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_from_not_nullable(*source_col, 5);
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_many_from_not_nullable(*source_col, 5, 5);
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_range_from_not_nullable(*source_col, 5, 5);
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_range_from(
|
||||
*ColumnNullable::create(std::move(source_col), ColumnUInt8::create(10)), 5, 5);
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_null_elements(10);
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_from(*null_col, 100);
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
|
||||
auto tmp_col =
|
||||
ColumnNullable::create(create_nested_column<Int64>(10), ColumnUInt8::create(10, 1));
|
||||
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_from(*tmp_col, 9);
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_range_from(*tmp_col, 0, 3);
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
|
||||
dst_col->clear();
|
||||
EXPECT_FALSE(dst_col->has_null());
|
||||
dst_col->insert_from(*tmp_col, 9);
|
||||
EXPECT_TRUE(dst_col->has_null());
|
||||
}
|
||||
|
||||
TEST(ColumnNullableTest, PredicateTest) {
|
||||
auto nullable_pred =
|
||||
ColumnNullable::create(PredicateColumnType<TYPE_DATE>::create(), ColumnUInt8::create());
|
||||
nullable_pred->insert_many_defaults(3);
|
||||
EXPECT_TRUE(nullable_pred->has_null());
|
||||
nullable_pred->insert_null_elements(10);
|
||||
EXPECT_TRUE(nullable_pred->has_null());
|
||||
|
||||
nullable_pred->clear();
|
||||
EXPECT_FALSE(nullable_pred->has_null());
|
||||
nullable_pred->insert_null_elements(10);
|
||||
EXPECT_TRUE(nullable_pred->has_null()); // now it have 10 nulls
|
||||
|
||||
auto null_dst = ColumnNullable::create(ColumnDate::create(), ColumnUInt8::create());
|
||||
EXPECT_FALSE(null_dst->has_null());
|
||||
|
||||
uint16_t selector[] = {5, 8}; // both null
|
||||
EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()), Status::OK());
|
||||
// filter_by_selector must announce to update has_null to make below right.
|
||||
EXPECT_TRUE(null_dst->has_null());
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
105
be/test/vec/columns/column_nullable_test.h
Normal file
105
be/test/vec/columns/column_nullable_test.h
Normal file
@ -0,0 +1,105 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gmock/gmock-more-matchers.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/columns/column_nullable.h"
|
||||
#include "vec/columns/column_string.h"
|
||||
#include "vec/columns/columns_number.h"
|
||||
#include "vec/core/field.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
#include "vec/data_types/data_type_date_time.h"
|
||||
#include "vec/data_types/data_type_string.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
inline std::string generate_random_string(size_t max_length) {
|
||||
std::srand(std::time(nullptr)); // use current time as seed for random generator
|
||||
|
||||
if (max_length == 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
auto randbyte = []() -> char {
|
||||
// generate a random byte, in range [0x00, 0xFF]
|
||||
return static_cast<char>(rand() % 256);
|
||||
};
|
||||
|
||||
std::string str(max_length, 0);
|
||||
std::generate_n(str.begin(), max_length, randbyte);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
inline MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null = false,
|
||||
bool all_not_null = false) {
|
||||
std::srand(std::time(nullptr)); // use current time as seed for random generator
|
||||
auto null_map = ColumnUInt8::create();
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
if (all_null) {
|
||||
null_map->insert(1);
|
||||
} else if (all_not_null) {
|
||||
null_map->insert(0);
|
||||
} else {
|
||||
null_map->insert(rand() % 2);
|
||||
}
|
||||
}
|
||||
return null_map;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline MutableColumnPtr create_nested_column(size_t input_rows_count) {
|
||||
MutableColumnPtr column;
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
column = ColumnVector<T>::create();
|
||||
} else if constexpr (std::is_same_v<T, String>) {
|
||||
column = ColumnString::create();
|
||||
} else if constexpr (std::is_same_v<T, Decimal64>) {
|
||||
column = ColumnDecimal64::create(0, 6);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
if constexpr (std::is_integral_v<T>) {
|
||||
column->insert(rand() % std::numeric_limits<T>::max());
|
||||
} else if constexpr (std::is_same_v<T, String>) {
|
||||
column->insert(generate_random_string(rand() % 512));
|
||||
} else if constexpr (std::is_same_v<T, Decimal64>) {
|
||||
column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
|
||||
} else {
|
||||
throw std::runtime_error("Unsupported type");
|
||||
}
|
||||
}
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline ColumnNullable::MutablePtr create_column_nullable(size_t input_rows_count,
|
||||
bool all_null = false,
|
||||
bool all_not_null = false) {
|
||||
auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
|
||||
auto nested_column = create_nested_column<T>(input_rows_count);
|
||||
return ColumnNullable::create(std::move(nested_column), std::move(null_map));
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
||||
@ -266,6 +266,14 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
|
||||
} else if (std::is_same_v<ReturnType, DateTimeV2>) {
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
|
||||
} else if (std::is_same_v<ReturnType, Decimal128V2>) {
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
|
||||
} else if (std::is_same_v<ReturnType, Decimal32>) {
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
|
||||
} else if (std::is_same_v<ReturnType, Decimal64>) {
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
|
||||
} else if (std::is_same_v<ReturnType, Decimal128V3>) {
|
||||
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
|
||||
} else {
|
||||
fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
|
||||
}
|
||||
|
||||
@ -0,0 +1,6 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !test1 --
|
||||
|
||||
-- !test2 --
|
||||
0
|
||||
|
||||
@ -0,0 +1,57 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_column_nullable_cache") {
|
||||
sql """
|
||||
drop table if exists test_column_nullable_cache;
|
||||
"""
|
||||
sql """
|
||||
CREATE TABLE `test_column_nullable_cache` (
|
||||
`col_int_undef_signed2` int NULL,
|
||||
`col_int_undef_signed` int NULL,
|
||||
`col_int_undef_signed3` int NULL,
|
||||
`col_int_undef_signed4` int NULL,
|
||||
`pk` int NULL
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`col_int_undef_signed2`)
|
||||
DISTRIBUTED by RANDOM BUCKETS 10
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
insert into test_column_nullable_cache
|
||||
(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
|
||||
values (0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),
|
||||
(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),
|
||||
(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),
|
||||
(15,2,7,6,6),(16,8,5,-4582103,1),(17,5,-4677722,-2379367,4),(18,-7807532,-6686732,0,5329341),
|
||||
(19,8,7,-4013246,-7013374),(20,0,2,9,2),(21,7,2383333,5,4),(22,5844611,2,2,0),(23,0,4756185,0,-5612039),
|
||||
(24,6,4878754,608172,0),(25,null,7858692,7,-6704206),(26,7,-1697597,6,9),(27,9,-7021349,3,-3094786),
|
||||
(28,2,2830915,null,8),(29,4133633,489212,5,9),(30,6,-3346211,3668768,2),(31,1,4862070,-5066405,0),(32,9,6,7,8),
|
||||
(33,2,null,4,2),(34,1,2893430,-3282825,5),(35,2,3,4,2),(36,4,-3418732,6,1263819),(37,5,4,-6342170,6),(99,9,2,8,null);
|
||||
"""
|
||||
|
||||
qt_test1 """
|
||||
select * from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
|
||||
"""
|
||||
|
||||
qt_test2 """
|
||||
select count(*) from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
|
||||
"""
|
||||
}
|
||||
Reference in New Issue
Block a user