[branch-2.1](Column) refactor ColumnNullable to provide flags safety (#40769) (#40848)

pick https://github.com/apache/doris/pull/40769

Co-authored-by: Jerry Hu <mrhhsg@gmail.com>
This commit is contained in:
zclllhhjj
2024-09-14 16:27:43 +08:00
committed by GitHub
parent 963415ce45
commit cecd214345
8 changed files with 426 additions and 100 deletions

View File

@ -31,18 +31,19 @@
namespace doris::vectorized {
ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_)
: nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) {
: NullMapProvider(std::move(null_map_)), nested_column(std::move(nested_column_)) {
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = get_nested_column().convert_to_full_column_if_const();
// after convert const column to full column, it may be a nullable column
if (nested_column->is_nullable()) {
assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const ColumnUInt8&)*null_map);
null_map = assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
assert_cast<ColumnNullable&>(*nested_column)
.apply_null_map(static_cast<const ColumnUInt8&>(get_null_map_column()));
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
nested_column = assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
}
if (is_column_const(*null_map)) {
if (is_column_const(get_null_map_column())) [[unlikely]] {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnNullable cannot have constant null map");
__builtin_unreachable();
@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t
nested_column->update_xxHash_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::xxHash64NullWithSeed(hash);
@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start, size_t end, uint32_t& h
nested_column->update_crc_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::zlib_crc_hash_null(hash);
@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris::
auto s = rows;
DCHECK(s == size());
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_crcs_with_value(hashes, type, rows, offset, nullptr);
} else {
@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes,
DCHECK(null_data == nullptr);
auto s = size();
const auto* __restrict real_null_data =
assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_hashes_with_value(hashes, nullptr);
} else {
@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
void ColumnNullable::insert_data(const char* pos, size_t length) {
if (pos == nullptr) {
get_nested_column().insert_default();
_get_null_map_data().push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
} else {
get_nested_column().insert_data(pos, length);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}
}
void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num) {
auto& null_map_data = _get_null_map_data();
for (size_t i = 0; i != num; ++i) {
if (strings[i].data == nullptr) {
nested_column->insert_default();
null_map_data.push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
} else {
nested_column->insert_data(strings[i].data, strings[i].size);
null_map_data.push_back(0);
_push_false_to_nullmap(1);
}
}
}
@ -227,13 +228,14 @@ const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
pos += sizeof(val);
_get_null_map_data().push_back(val);
get_null_map_data().push_back(val);
if (val == 0) {
pos = get_nested_column().deserialize_and_insert_from_arena(pos);
} else {
get_nested_column().insert_default();
_has_null = true;
_need_update_has_null = false;
}
return pos;
@ -265,7 +267,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>& keys, size_t num_rows
}
void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t num_rows) {
auto& arr = _get_null_map_data();
auto& arr = get_null_map_data();
const size_t old_size = arr.size();
arr.resize(old_size + num_rows);
@ -288,21 +290,15 @@ void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
void ColumnNullable::insert_range_from_ignore_overflow(const doris::vectorized::IColumn& src,
size_t start, size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column, start,
length);
const auto& src_null_map_data = nullable_col.get_null_map_data();
_has_null = has_null();
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
}
void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
_get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(), start, length);
get_nested_column().insert_range_from(*nullable_col.nested_column, start, length);
const auto& src_null_map_data = nullable_col.get_null_map_data();
_has_null = has_null();
_has_null |= simd::contain_byte(src_null_map_data.data() + start, length, 1);
}
void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
@ -310,9 +306,8 @@ void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t* ind
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
indices_end);
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
indices_end);
_need_update_has_null = true;
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin,
indices_end);
}
void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
@ -321,17 +316,18 @@ void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin,
indices_end);
_get_null_map_column().insert_many_defaults(indices_end - indices_begin);
_push_false_to_nullmap(indices_end - indices_begin);
}
void ColumnNullable::insert(const Field& x) {
if (x.is_null()) {
get_nested_column().insert_default();
_get_null_map_data().push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
} else {
get_nested_column().insert(x);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}
}
@ -339,19 +335,18 @@ void ColumnNullable::insert_from(const IColumn& src, size_t n) {
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
auto is_null = src_concrete.get_null_map_data()[n];
_has_null |= is_null;
_get_null_map_data().push_back(is_null);
get_null_map_data().push_back(is_null);
}
void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
get_nested_column().insert_from(src, n);
_get_null_map_data().push_back(0);
_push_false_to_nullmap(1);
}
void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start,
size_t length) {
get_nested_column().insert_range_from(src, start, length);
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
_push_false_to_nullmap(length);
}
void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position,
@ -380,15 +375,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
}
Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) {
const auto* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>(col_ptr);
auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
/// `get_null_map_data` will set `_need_update_has_null` to true
auto& res_nullmap = nullable_col_ptr->get_null_map_data();
RETURN_IF_ERROR(get_nested_column().filter_by_selector(
sel, sel_size, const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
//insert cur nullmap into result nullmap which is empty
auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
->get_data();
DCHECK(res_nullmap.empty());
res_nullmap.resize(sel_size);
auto& cur_nullmap = get_null_map_column().get_data();
@ -535,15 +529,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direct
}
}
}
//
//void ColumnNullable::gather(ColumnGathererStream & gatherer)
//{
// gatherer.gather(*this);
//}
void ColumnNullable::reserve(size_t n) {
get_nested_column().reserve(n);
_get_null_map_data().reserve(n);
get_null_map_data(false).reserve(n);
}
void ColumnNullable::resize(size_t n) {
@ -595,7 +584,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable& other) {
}
void ColumnNullable::check_consistency() const {
if (null_map->size() != get_nested_column().size()) {
if (get_null_map_column().size() != get_nested_column().size()) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Sizes of nested column and null map of Nullable column are not equal");
}
@ -609,8 +598,8 @@ void ColumnNullable::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
}
void ColumnNullable::_update_has_null() {
const UInt8* null_pos = _get_null_map_data().data();
_has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
const UInt8* null_pos = get_null_map_data().data();
_has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
_need_update_has_null = false;
}

View File

@ -21,7 +21,6 @@
#pragma once
#include <functional>
#include <ostream>
#include <string>
#include <type_traits>
#include <utility>
@ -50,6 +49,69 @@ class ColumnSorter;
using NullMap = ColumnUInt8::Container;
using ConstNullMapPtr = const NullMap*;
/// use this to avoid directly access null_map forgetting modify _need_update_has_null. see more in inner comments
class NullMapProvider {
public:
NullMapProvider() = default;
NullMapProvider(MutableColumnPtr&& null_map) : _null_map(std::move(null_map)) {}
void reset_null_map(MutableColumnPtr&& null_map) { _null_map = std::move(null_map); }
// return the column that represents the byte map. if want use null_map, just call this.
const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
// for functions getting nullmap, we assume it will modify it. so set `_need_update_has_null` to true. if you know it wouldn't,
// call with arg false. but for the ops which will set _has_null themselves, call `update_has_null()`
MutableColumnPtr get_null_map_column_ptr(bool may_change = true) {
if (may_change) {
_need_update_has_null = true;
}
return _null_map->assume_mutable();
}
ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) {
if (may_change) {
_need_update_has_null = true;
}
return _null_map;
}
ColumnUInt8& get_null_map_column(bool may_change = true) {
if (may_change) {
_need_update_has_null = true;
}
return assert_cast<ColumnUInt8&>(*_null_map);
}
const ColumnUInt8& get_null_map_column() const {
return assert_cast<const ColumnUInt8&>(*_null_map);
}
NullMap& get_null_map_data(bool may_change = true) {
return get_null_map_column(may_change).get_data();
}
const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); }
void clear_null_map() { assert_cast<ColumnUInt8*>(_null_map.get())->clear(); }
void update_has_null(bool new_value) {
_has_null = new_value;
_need_update_has_null = false;
}
protected:
/**
* Here we have three variables which serve for `has_null()` judgement. If we have known the nullity of object, no need
* to check through the `null_map` to get the answer until the next time we modify it. Here `_has_null` is just the answer
* we cached. `_need_update_has_null` indicates there's modification or not since we got `_has_null()` last time. So in
* `_has_null()` we can check the two vars to know if there's need to update `has_null` or not.
* If you just want QUERY BUT NOT MODIFY, make sure the caller is const. There will be no perf overhead for const overload.
* Otherwise, this class, as the base class, will make it no possible to directly visit `null_map` forgetting to change the
* protected flags. Just call the interface is ok.
*/
bool _need_update_has_null = true;
bool _has_null = true;
private:
IColumn::WrappedPtr _null_map;
};
/// Class that specifies nullable columns. A nullable column represents
/// a column, which may have any type, provided with the possibility of
/// storing NULL values. For this purpose, a ColumnNullable object stores
@ -59,7 +121,7 @@ using ConstNullMapPtr = const NullMap*;
/// over a bitmap because columns are usually stored on disk as compressed
/// files. In this regard, using a bitmap instead of a byte map would
/// greatly complicate the implementation with little to no benefits.
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public NullMapProvider {
private:
friend class COWHelper<IColumn, ColumnNullable>;
@ -88,9 +150,11 @@ public:
const char* get_family_name() const override { return "Nullable"; }
std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; }
MutableColumnPtr clone_resized(size_t size) const override;
size_t size() const override { return assert_cast<const ColumnUInt8&>(*null_map).size(); }
size_t size() const override {
return assert_cast<const ColumnUInt8&>(get_null_map_column()).size();
}
PURE bool is_null_at(size_t n) const override {
return assert_cast<const ColumnUInt8&>(*null_map).get_data()[n] != 0;
return assert_cast<const ColumnUInt8&>(get_null_map_column()).get_data()[n] != 0;
}
bool is_default_at(size_t n) const override { return is_null_at(n); }
Field operator[](size_t n) const override;
@ -139,8 +203,13 @@ public:
assert_cast<ColumnType*>(nested_column.get())
->insert_from(src_concrete.get_nested_column(), n);
auto is_null = src_concrete.get_null_map_data()[n];
_has_null |= is_null;
_get_null_map_data().push_back(is_null);
if (is_null) {
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
} else {
_push_false_to_nullmap(1);
}
}
void insert_from_not_nullable(const IColumn& src, size_t n);
@ -148,18 +217,18 @@ public:
void insert_many_from_not_nullable(const IColumn& src, size_t position, size_t length);
void insert_many_fix_len_data(const char* pos, size_t num) override {
_get_null_map_column().insert_many_vals(0, num);
_push_false_to_nullmap(num);
get_nested_column().insert_many_fix_len_data(pos, num);
}
void insert_many_raw_data(const char* pos, size_t num) override {
_get_null_map_column().insert_many_vals(0, num);
_push_false_to_nullmap(num);
get_nested_column().insert_many_raw_data(pos, num);
}
void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
size_t data_num, uint32_t dict_num) override {
_get_null_map_column().insert_many_vals(0, data_num);
_push_false_to_nullmap(data_num);
get_nested_column().insert_many_dict_data(data_array, start_index, dict, data_num,
dict_num);
}
@ -169,38 +238,40 @@ public:
if (UNLIKELY(num == 0)) {
return;
}
_get_null_map_column().insert_many_vals(0, num);
_push_false_to_nullmap(num);
get_nested_column().insert_many_continuous_binary_data(data, offsets, num);
}
void insert_many_binary_data(char* data_array, uint32_t* len_array,
uint32_t* start_offset_array, size_t num) override {
_get_null_map_column().insert_many_vals(0, num);
_push_false_to_nullmap(num);
get_nested_column().insert_many_binary_data(data_array, len_array, start_offset_array, num);
}
void insert_default() override {
get_nested_column().insert_default();
_get_null_map_data().push_back(1);
get_null_map_data().push_back(1);
_has_null = true;
_need_update_has_null = false;
}
void insert_many_defaults(size_t length) override {
get_nested_column().insert_many_defaults(length);
_get_null_map_data().resize_fill(_get_null_map_data().size() + length, 1);
get_null_map_data().resize_fill(get_null_map_data().size() + length, 1);
_has_null = true;
_need_update_has_null = false;
}
void insert_not_null_elements(size_t num) {
get_nested_column().insert_many_defaults(num);
_get_null_map_column().insert_many_vals(0, num);
_has_null = false;
_push_false_to_nullmap(num);
}
void insert_null_elements(int num) {
get_nested_column().insert_many_defaults(num);
_get_null_map_column().insert_many_vals(1, num);
get_null_map_column().insert_many_vals(1, num);
_has_null = true;
_need_update_has_null = false;
}
void pop_back(size_t n) override;
@ -252,7 +323,7 @@ public:
void for_each_subcolumn(ColumnCallback callback) override {
callback(nested_column);
callback(null_map);
callback(get_null_map());
}
bool structure_equals(const IColumn& rhs) const override {
@ -279,11 +350,13 @@ public:
bool values_have_fixed_size() const override { return nested_column->values_have_fixed_size(); }
bool is_exclusive() const override {
return IColumn::is_exclusive() && nested_column->is_exclusive() && null_map->is_exclusive();
return IColumn::is_exclusive() && nested_column->is_exclusive() &&
get_null_map_column().is_exclusive();
}
size_t size_of_value_if_fixed() const override {
return null_map->size_of_value_if_fixed() + nested_column->size_of_value_if_fixed();
return get_null_map_column().size_of_value_if_fixed() +
nested_column->size_of_value_if_fixed();
}
bool only_null() const override { return size() == 1 && is_null_at(0); }
@ -299,32 +372,12 @@ public:
MutableColumnPtr get_nested_column_ptr() { return nested_column->assume_mutable(); }
/// Return the column that represents the byte map.
const ColumnPtr& get_null_map_column_ptr() const { return null_map; }
MutableColumnPtr get_null_map_column_ptr() {
_need_update_has_null = true;
return null_map->assume_mutable();
}
ColumnUInt8& get_null_map_column() {
_need_update_has_null = true;
return assert_cast<ColumnUInt8&>(*null_map);
}
const ColumnUInt8& get_null_map_column() const {
return assert_cast<const ColumnUInt8&>(*null_map);
}
void clear() override {
null_map->clear();
clear_null_map();
nested_column->clear();
_has_null = false;
}
NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); }
/// Apply the null byte map of a specified nullable column onto the
/// null byte map of the current column by performing an element-wise OR
/// between both byte maps. This method is used to determine the null byte
@ -349,7 +402,8 @@ public:
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
DCHECK(size() > self_row);
const auto& nullable_rhs = assert_cast<const ColumnNullable&>(rhs);
null_map->replace_column_data(*nullable_rhs.null_map, row, self_row);
get_null_map_column().replace_column_data(nullable_rhs.get_null_map_column(), row,
self_row);
if (!nullable_rhs.is_null_at(row)) {
nested_column->replace_column_data(*nullable_rhs.nested_column, row, self_row);
@ -398,19 +452,15 @@ public:
ColumnPtr index(const IColumn& indexes, size_t limit) const override;
private:
// the two functions will not update `_need_update_has_null`
ColumnUInt8& _get_null_map_column() { return assert_cast<ColumnUInt8&>(*null_map); }
NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); }
WrappedPtr nested_column;
WrappedPtr null_map;
bool _need_update_has_null = true;
bool _has_null;
void _update_has_null();
template <bool negative>
void apply_null_map_impl(const ColumnUInt8& map);
// push not null value wouldn't change the nullity. no need to update _has_null
void _push_false_to_nullmap(size_t num) { get_null_map_column(false).insert_many_vals(0, num); }
WrappedPtr nested_column;
};
ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);

View File

@ -0,0 +1,112 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/columns/column_nullable.h"
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <gtest/gtest.h>
#include "column_nullable_test.h"
#include "common/status.h"
#include "runtime/define_primitive_type.h"
#include "vec/columns/columns_number.h"
#include "vec/columns/predicate_column.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
namespace doris::vectorized {
TEST(ColumnNullableTest, NullTest) {
ColumnNullable::MutablePtr null_col = create_column_nullable<Int64>(500, true);
EXPECT_TRUE(null_col->has_null());
ColumnNullable::MutablePtr dst_col =
ColumnNullable::create(create_nested_column<Int64>(10), ColumnUInt8::create(10));
EXPECT_FALSE(dst_col->has_null());
ColumnInt64::MutablePtr source_col = ColumnInt64::create();
source_col->insert_range_of_integer(0, 100);
dst_col->insert(Field());
EXPECT_TRUE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_many_from_not_nullable(*source_col, 0, 10);
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_from_not_nullable(*source_col, 5);
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_many_from_not_nullable(*source_col, 5, 5);
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_range_from_not_nullable(*source_col, 5, 5);
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_range_from(
*ColumnNullable::create(std::move(source_col), ColumnUInt8::create(10)), 5, 5);
EXPECT_FALSE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_null_elements(10);
EXPECT_TRUE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_from(*null_col, 100);
EXPECT_TRUE(dst_col->has_null());
auto tmp_col =
ColumnNullable::create(create_nested_column<Int64>(10), ColumnUInt8::create(10, 1));
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_from(*tmp_col, 9);
EXPECT_TRUE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_range_from(*tmp_col, 0, 3);
EXPECT_TRUE(dst_col->has_null());
dst_col->clear();
EXPECT_FALSE(dst_col->has_null());
dst_col->insert_from(*tmp_col, 9);
EXPECT_TRUE(dst_col->has_null());
}
TEST(ColumnNullableTest, PredicateTest) {
auto nullable_pred =
ColumnNullable::create(PredicateColumnType<TYPE_DATE>::create(), ColumnUInt8::create());
nullable_pred->insert_many_defaults(3);
EXPECT_TRUE(nullable_pred->has_null());
nullable_pred->insert_null_elements(10);
EXPECT_TRUE(nullable_pred->has_null());
nullable_pred->clear();
EXPECT_FALSE(nullable_pred->has_null());
nullable_pred->insert_null_elements(10);
EXPECT_TRUE(nullable_pred->has_null()); // now it have 10 nulls
auto null_dst = ColumnNullable::create(ColumnDate::create(), ColumnUInt8::create());
EXPECT_FALSE(null_dst->has_null());
uint16_t selector[] = {5, 8}; // both null
EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()), Status::OK());
// filter_by_selector must announce to update has_null to make below right.
EXPECT_TRUE(null_dst->has_null());
}
} // namespace doris::vectorized

View File

@ -0,0 +1,105 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gmock/gmock-more-matchers.h>
#include <gtest/gtest.h>
#include <cstddef>
#include <cstdlib>
#include <type_traits>
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_string.h"
namespace doris::vectorized {
inline std::string generate_random_string(size_t max_length) {
std::srand(std::time(nullptr)); // use current time as seed for random generator
if (max_length == 0) {
return "";
}
auto randbyte = []() -> char {
// generate a random byte, in range [0x00, 0xFF]
return static_cast<char>(rand() % 256);
};
std::string str(max_length, 0);
std::generate_n(str.begin(), max_length, randbyte);
return str;
}
inline MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null = false,
bool all_not_null = false) {
std::srand(std::time(nullptr)); // use current time as seed for random generator
auto null_map = ColumnUInt8::create();
for (size_t i = 0; i < input_rows_count; ++i) {
if (all_null) {
null_map->insert(1);
} else if (all_not_null) {
null_map->insert(0);
} else {
null_map->insert(rand() % 2);
}
}
return null_map;
}
template <typename T>
inline MutableColumnPtr create_nested_column(size_t input_rows_count) {
MutableColumnPtr column;
if constexpr (std::is_integral_v<T>) {
column = ColumnVector<T>::create();
} else if constexpr (std::is_same_v<T, String>) {
column = ColumnString::create();
} else if constexpr (std::is_same_v<T, Decimal64>) {
column = ColumnDecimal64::create(0, 6);
}
for (size_t i = 0; i < input_rows_count; ++i) {
if constexpr (std::is_integral_v<T>) {
column->insert(rand() % std::numeric_limits<T>::max());
} else if constexpr (std::is_same_v<T, String>) {
column->insert(generate_random_string(rand() % 512));
} else if constexpr (std::is_same_v<T, Decimal64>) {
column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
} else {
throw std::runtime_error("Unsupported type");
}
}
return column;
}
template <typename T>
inline ColumnNullable::MutablePtr create_column_nullable(size_t input_rows_count,
bool all_null = false,
bool all_not_null = false) {
auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
auto nested_column = create_nested_column<T>(input_rows_count);
return ColumnNullable::create(std::move(nested_column), std::move(null_map));
}
} // namespace doris::vectorized

View File

@ -1,4 +1,3 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information

View File

@ -266,6 +266,14 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
} else if (std::is_same_v<ReturnType, DateTimeV2>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
} else if (std::is_same_v<ReturnType, Decimal128V2>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
} else if (std::is_same_v<ReturnType, Decimal32>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
} else if (std::is_same_v<ReturnType, Decimal64>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
} else if (std::is_same_v<ReturnType, Decimal128V3>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
} else {
fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
}

View File

@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !test1 --
-- !test2 --
0

View File

@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_column_nullable_cache") {
sql """
drop table if exists test_column_nullable_cache;
"""
sql """
CREATE TABLE `test_column_nullable_cache` (
`col_int_undef_signed2` int NULL,
`col_int_undef_signed` int NULL,
`col_int_undef_signed3` int NULL,
`col_int_undef_signed4` int NULL,
`pk` int NULL
) ENGINE=OLAP
DUPLICATE KEY(`col_int_undef_signed2`)
DISTRIBUTED by RANDOM BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """
insert into test_column_nullable_cache
(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
values (0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),
(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),
(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),
(15,2,7,6,6),(16,8,5,-4582103,1),(17,5,-4677722,-2379367,4),(18,-7807532,-6686732,0,5329341),
(19,8,7,-4013246,-7013374),(20,0,2,9,2),(21,7,2383333,5,4),(22,5844611,2,2,0),(23,0,4756185,0,-5612039),
(24,6,4878754,608172,0),(25,null,7858692,7,-6704206),(26,7,-1697597,6,9),(27,9,-7021349,3,-3094786),
(28,2,2830915,null,8),(29,4133633,489212,5,9),(30,6,-3346211,3668768,2),(31,1,4862070,-5066405,0),(32,9,6,7,8),
(33,2,null,4,2),(34,1,2893430,-3282825,5),(35,2,3,4,2),(36,4,-3418732,6,1263819),(37,5,4,-6342170,6),(99,9,2,8,null);
"""
qt_test1 """
select * from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
"""
qt_test2 """
select count(*) from test_column_nullable_cache where col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
"""
}