[refactor](column) remove unused method and column definitions (#25152)

remove unused method and column definitions
using primitive type in predicate column to check datev1 and datev2
This commit is contained in:
yiguolei
2023-10-09 17:14:35 +08:00
committed by GitHub
parent 263631e983
commit 4de3df6a46
22 changed files with 18 additions and 481 deletions

View File

@ -167,22 +167,22 @@ struct PrimitiveTypeTraits<TYPE_DOUBLE> {
template <>
struct PrimitiveTypeTraits<TYPE_DATE> {
using CppType = doris::vectorized::VecDateTimeValue;
using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
using ColumnType = vectorized::ColumnVector<vectorized::Int64>;
};
template <>
struct PrimitiveTypeTraits<TYPE_DATETIME> {
using CppType = doris::vectorized::VecDateTimeValue;
using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
using ColumnType = vectorized::ColumnVector<vectorized::Int64>;
};
template <>
struct PrimitiveTypeTraits<TYPE_DATETIMEV2> {
using CppType = doris::vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>;
using ColumnType = vectorized::ColumnVector<vectorized::DateTimeV2>;
using ColumnType = vectorized::ColumnVector<vectorized::UInt64>;
};
template <>
struct PrimitiveTypeTraits<TYPE_DATEV2> {
using CppType = doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType>;
using ColumnType = vectorized::ColumnVector<vectorized::DateV2>;
using ColumnType = vectorized::ColumnVector<vectorized::UInt32>;
};
template <>
struct PrimitiveTypeTraits<TYPE_DECIMALV2> {

View File

@ -531,14 +531,6 @@ public:
/// TODO: interface decoupled from ColumnGathererStream that allows non-generic specializations.
// virtual void gather(ColumnGathererStream & gatherer_stream) = 0;
/** Computes minimum and maximum element of the column.
* In addition to numeric types, the function is completely implemented for Date and DateTime.
* For strings and arrays function should return default value.
* (except for constant columns; they should return value of the constant).
* If column is empty function should return default value.
*/
virtual void get_extremes(Field& min, Field& max) const = 0;
/// Reserves memory for specified amount of elements. If reservation isn't possible, does nothing.
/// It affects performance only (not correctness).
virtual void reserve(size_t /*n*/) {}

View File

@ -179,9 +179,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const override;
void replicate(const uint32_t* counts, size_t target_size, IColumn& column) const override;
ColumnPtr convert_to_full_column_if_const() const override;
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not implemented";
}
/** More efficient methods of manipulation */
IColumn& get_data() { return *data; }

View File

@ -239,10 +239,6 @@ public:
"compare_at for " + std::string(get_family_name()));
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not implemented";
}
bool can_be_inside_nullable() const override { return true; }
bool is_fixed_and_contiguous() const override { return true; }

View File

@ -228,8 +228,6 @@ public:
LOG(FATAL) << "append_data_by_selector is not supported in ColumnConst!";
}
void get_extremes(Field& min, Field& max) const override { data->get_extremes(min, max); }
void for_each_subcolumn(ColumnCallback callback) override { callback(data); }
bool structure_equals(const IColumn& rhs) const override {

View File

@ -448,28 +448,6 @@ void ColumnDecimal<T>::replicate(const uint32_t* __restrict indexs, size_t targe
}
}
template <typename T>
void ColumnDecimal<T>::get_extremes(Field& min, Field& max) const {
if (data.size() == 0) {
min = NearestFieldType<T>(T(), scale);
max = NearestFieldType<T>(T(), scale);
return;
}
T cur_min = data[0];
T cur_max = data[0];
for (const T& x : data) {
if (x < cur_min)
cur_min = x;
else if (x > cur_max)
cur_max = x;
}
min = NearestFieldType<T>(cur_min, scale);
max = NearestFieldType<T>(cur_max, scale);
}
template <typename T>
void ColumnDecimal<T>::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms, EqualRange& range,

View File

@ -231,8 +231,6 @@ public:
TypeIndex get_data_type() const override { return TypeId<T>::value; }
void get_extremes(Field& min, Field& max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns,
const IColumn::Selector& selector) const override {
return this->template scatter_impl<Self>(num_columns, selector);

View File

@ -151,10 +151,6 @@ public:
LOG(FATAL) << "compare_at not supported in ColumnDictionary";
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not supported in ColumnDictionary";
}
bool can_be_inside_nullable() const override { return true; }
bool is_fixed_and_contiguous() const override { return true; }

View File

@ -1,177 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
// and modified by Doris
#pragma once
#include "vec/columns/column.h"
#include "vec/columns/columns_common.h"
#include "vec/common/arena.h"
#include "vec/common/pod_array.h"
namespace doris::vectorized {
/** Base class for columns-constants that contain a value that is not in the `Field`.
* Not a full-fledged column and is used in a special way.
*/
class IColumnDummy : public IColumn {
public:
IColumnDummy() : s(0) {}
IColumnDummy(size_t s_) : s(s_) {}
public:
virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
MutableColumnPtr clone_resized(size_t s) const override { return clone_dummy(s); }
size_t size() const override { return s; }
void insert_default() override { ++s; }
void pop_back(size_t n) override { s -= n; }
size_t byte_size() const override { return 0; }
size_t allocated_bytes() const override { return 0; }
int compare_at(size_t, size_t, const IColumn&, int) const override { return 0; }
[[noreturn]] Field operator[](size_t) const override {
LOG(FATAL) << "Cannot get value from " << get_name();
}
void get(size_t, Field&) const override {
LOG(FATAL) << "Cannot get value from " << get_name();
}
void insert(const Field&) override {
LOG(FATAL) << "Cannot insert element into " << get_name();
}
StringRef get_data_at(size_t) const override { return {}; }
void insert_data(const char*, size_t) override { ++s; }
StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
char const*& begin) const override {
return {arena.alloc_continue(0, begin), 0};
}
const char* deserialize_and_insert_from_arena(const char* pos) override {
++s;
return pos;
}
void insert_from(const IColumn&, size_t) override { ++s; }
void insert_range_from(const IColumn& /*src*/, size_t /*start*/, size_t length) override {
s += length;
}
void insert_indices_from(const IColumn& src, const int* indices_begin,
const int* indices_end) override {
s += (indices_end - indices_begin);
}
ColumnPtr filter(const Filter& filt, ssize_t /*result_size_hint*/) const override {
return clone_dummy(count_bytes_in_filter(filt));
}
size_t filter(const Filter& filter) override {
const auto result_size = count_bytes_in_filter(filter);
s = result_size;
return result_size;
}
ColumnPtr permute(const Permutation& perm, size_t limit) const override {
if (s != perm.size()) {
LOG(FATAL) << "Size of permutation doesn't match size of column.";
}
return clone_dummy(limit ? std::min(s, limit) : s);
}
void get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/,
Permutation& res) const override {
res.resize(s);
for (size_t i = 0; i < s; ++i) res[i] = i;
}
ColumnPtr replicate(const Offsets& offsets) const override {
column_match_offsets_size(s, offsets.size());
return clone_dummy(offsets.back());
}
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
if (s != selector.size()) {
LOG(FATAL) << "Size of selector doesn't match size of column.";
}
std::vector<size_t> counts(num_columns);
for (auto idx : selector) ++counts[idx];
MutableColumns res(num_columns);
for (size_t i = 0; i < num_columns; ++i) res[i] = clone_resized(counts[i]);
return res;
}
void append_data_by_selector(MutableColumnPtr& res,
const IColumn::Selector& selector) const override {
size_t num_rows = size();
if (num_rows < selector.size()) {
LOG(FATAL) << fmt::format("Size of selector: {}, is larger than size of column:{}",
selector.size(), num_rows);
}
res->reserve(num_rows);
for (size_t i = 0; i < selector.size(); ++i) res->insert_from(*this, selector[i]);
}
void get_extremes(Field&, Field&) const override {}
void addSize(size_t delta) { s += delta; }
bool is_dummy() const override { return true; }
[[noreturn]] TypeIndex get_data_type() const override {
LOG(FATAL) << "IColumnDummy get_data_type not implemeted";
}
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
LOG(FATAL) << "should not call the method in column dummy";
}
void replace_column_data_default(size_t self_row = 0) override {
LOG(FATAL) << "should not call the method in column dummy";
}
void get_indices_of_non_default_rows(Offsets64&, size_t, size_t) const override {
LOG(FATAL) << "should not call the method in column dummy";
}
ColumnPtr index(const IColumn& indexes, size_t limit) const override {
if (indexes.size() < limit) {
LOG(FATAL) << "Size of indexes is less than required.";
}
return clone_dummy(limit ? limit : s);
}
protected:
size_t s;
};
} // namespace doris::vectorized

View File

@ -244,10 +244,6 @@ public:
this->template append_data_by_selector_impl<Self>(res, selector);
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not supported";
}
size_t byte_size() const override { return _data.size(); }
size_t item_size() const { return _item_size; }

View File

@ -121,9 +121,7 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
return scatter_impl<ColumnMap>(num_columns, selector);
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not implemented";
};
[[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_,
int nan_direction_hint) const override {
LOG(FATAL) << "compare_at not implemented";

View File

@ -485,93 +485,6 @@ void ColumnNullable::protect() {
get_null_map_column().protect();
}
namespace {
/// The following function implements a slightly more general version
/// of get_extremes() than the implementation from ColumnVector.
/// It takes into account the possible presence of nullable values.
template <typename T>
void getExtremesFromNullableContent(const ColumnVector<T>& col, const NullMap& null_map, Field& min,
Field& max) {
const auto& data = col.get_data();
size_t size = data.size();
if (size == 0) {
min = Null();
max = Null();
return;
}
bool has_not_null = false;
bool has_not_nan = false;
T cur_min = 0;
T cur_max = 0;
for (size_t i = 0; i < size; ++i) {
const T x = data[i];
if (null_map[i]) continue;
if (!has_not_null) {
cur_min = x;
cur_max = x;
has_not_null = true;
has_not_nan = !is_nan(x);
continue;
}
if (is_nan(x)) continue;
if (!has_not_nan) {
cur_min = x;
cur_max = x;
has_not_nan = true;
continue;
}
if (x < cur_min)
cur_min = x;
else if (x > cur_max)
cur_max = x;
}
if (has_not_null) {
min = cur_min;
max = cur_max;
}
}
} // namespace
void ColumnNullable::get_extremes(Field& min, Field& max) const {
min = Null();
max = Null();
const auto& null_map_data = get_null_map_data();
if (const auto col_i8 = typeid_cast<const ColumnInt8*>(nested_column.get()))
getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max);
else if (const auto col_i16 = typeid_cast<const ColumnInt16*>(nested_column.get()))
getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max);
else if (const auto col_i32 = typeid_cast<const ColumnInt32*>(nested_column.get()))
getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max);
else if (const auto col_i64 = typeid_cast<const ColumnInt64*>(nested_column.get()))
getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max);
else if (const auto col_u8 = typeid_cast<const ColumnUInt8*>(nested_column.get()))
getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max);
else if (const auto col_u16 = typeid_cast<const ColumnUInt16*>(nested_column.get()))
getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max);
else if (const auto col_u32 = typeid_cast<const ColumnUInt32*>(nested_column.get()))
getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max);
else if (const auto col_u64 = typeid_cast<const ColumnUInt64*>(nested_column.get()))
getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max);
else if (const auto col_f32 = typeid_cast<const ColumnFloat32*>(nested_column.get()))
getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max);
else if (const auto col_f64 = typeid_cast<const ColumnFloat64*>(nested_column.get()))
getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max);
}
ColumnPtr ColumnNullable::replicate(const Offsets& offsets) const {
ColumnPtr replicated_data = get_nested_column().replicate(offsets);
ColumnPtr replicated_null_map = get_null_map_column().replicate(offsets);

View File

@ -237,7 +237,6 @@ public:
const uint8_t* __restrict null_data) const override;
void update_hashes_with_value(uint64_t* __restrict hashes,
const uint8_t* __restrict null_data) const override;
void get_extremes(Field& min, Field& max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
return scatter_impl<ColumnNullable>(num_columns, selector);

View File

@ -371,10 +371,6 @@ public:
LOG(FATAL) << "should not call the method in column object";
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "should not call the method in column object";
}
void get_indices_of_non_default_rows(Offsets64&, size_t, size_t) const override {
LOG(FATAL) << "should not call the method in column object";
}

View File

@ -1,52 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnSet.h
// and modified by Doris
#pragma once
#include "exprs/hybrid_set.h"
#include "vec/columns/column_dummy.h"
namespace doris::vectorized {
using ConstSetPtr = std::shared_ptr<HybridSetBase>;
/** A column containing multiple values in the `IN` section.
* Behaves like a constant-column (because the set is one, not its own for each line).
* This column has a nonstandard value, so it can not be obtained via a normal interface.
*/
class ColumnSet final : public COWHelper<IColumnDummy, ColumnSet> {
public:
friend class COWHelper<IColumnDummy, ColumnSet>;
ColumnSet(size_t s_, const ConstSetPtr& data_) : data(data_) { s = s_; }
ColumnSet(const ColumnSet&) = default;
const char* get_family_name() const override { return "Set"; }
MutableColumnPtr clone_dummy(size_t s_) const override { return ColumnSet::create(s_, data); }
ConstSetPtr get_data() const { return data; }
TypeIndex get_data_type() const override { return TypeIndex::String; }
private:
ConstSetPtr data;
};
} // namespace doris::vectorized

View File

@ -505,33 +505,6 @@ void ColumnString::resize(size_t n) {
}
}
void ColumnString::get_extremes(Field& min, Field& max) const {
min = String();
max = String();
size_t col_size = size();
if (col_size == 0) {
return;
}
size_t min_idx = 0;
size_t max_idx = 0;
less<true> less_op(*this);
for (size_t i = 1; i < col_size; ++i) {
if (less_op(i, min_idx)) {
min_idx = i;
} else if (less_op(max_idx, i)) {
max_idx = i;
}
}
get(min_idx, min);
get(max_idx, max);
}
void ColumnString::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms, EqualRange& range,
bool last_column) const {

View File

@ -525,8 +525,6 @@ public:
void resize(size_t n) override;
void get_extremes(Field& min, Field& max) const override;
bool can_be_inside_nullable() const override { return true; }
bool is_column_string() const override { return true; }

View File

@ -363,20 +363,6 @@ void ColumnStruct::protect() {
}
}
void ColumnStruct::get_extremes(Field& min, Field& max) const {
const size_t tuple_size = columns.size();
Tuple min_tuple(tuple_size);
Tuple max_tuple(tuple_size);
for (size_t i = 0; i < tuple_size; ++i) {
columns[i]->get_extremes(min_tuple[i], max_tuple[i]);
}
min = min_tuple;
max = max_tuple;
}
void ColumnStruct::for_each_subcolumn(ColumnCallback callback) {
for (auto& column : columns) {
callback(column);

View File

@ -161,7 +161,6 @@ public:
int nan_direction_hint) const override {
LOG(FATAL) << "compare_at not implemented";
}
void get_extremes(Field& min, Field& max) const override;
void reserve(size_t n) override;
void resize(size_t n) override;
size_t byte_size() const override;

View File

@ -560,47 +560,6 @@ void ColumnVector<T>::replicate(const uint32_t* __restrict indexs, size_t target
}
}
template <typename T>
void ColumnVector<T>::get_extremes(Field& min, Field& max) const {
size_t size = data.size();
if (size == 0) {
min = T(0);
max = T(0);
return;
}
bool has_value = false;
/** Skip all NaNs in extremes calculation.
* If all values are NaNs, then return NaN.
* NOTE: There exist many different NaNs.
* Different NaN could be returned: not bit-exact value as one of NaNs from column.
*/
T cur_min = nan_or_zero<T>();
T cur_max = nan_or_zero<T>();
for (const T x : data) {
if (is_nan(x)) continue;
if (!has_value) {
cur_min = x;
cur_max = x;
has_value = true;
continue;
}
if (x < cur_min)
cur_min = x;
else if (x > cur_max)
cur_max = x;
}
min = NearestFieldType<T>(cur_min);
max = NearestFieldType<T>(cur_max);
}
template <typename T>
ColumnPtr ColumnVector<T>::index(const IColumn& indexes, size_t limit) const {
return select_index_impl(*this, indexes, limit);

View File

@ -425,8 +425,6 @@ public:
void replicate(const uint32_t* indexs, size_t target_size, IColumn& column) const override;
void get_extremes(Field& min, Field& max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns,
const IColumn::Selector& selector) const override {
return this->template scatter_impl<Self>(num_columns, selector);

View File

@ -415,10 +415,6 @@ public:
LOG(FATAL) << "compare_at not supported in PredicateColumnType";
}
void get_extremes(Field& min, Field& max) const override {
LOG(FATAL) << "get_extremes not supported in PredicateColumnType";
}
bool can_be_inside_nullable() const override { return true; }
bool is_fixed_and_contiguous() const override { return true; }
@ -469,7 +465,19 @@ public:
Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override {
ColumnType* column = assert_cast<ColumnType*>(col_ptr);
if constexpr (std::is_same_v<ColumnVector<T>, ColumnType>) {
// DateV1 and DateTimeV1 is special, its storage format is different from compute format
// should convert here.
if constexpr (Type == TYPE_DATE || Type == TYPE_DATETIME) {
if constexpr (std::is_same_v<T, uint32_t>) {
insert_date_to_res_column(sel, sel_size, column);
} else if constexpr (std::is_same_v<T, uint64_t>) {
insert_datetime_to_res_column(sel, sel_size, column);
} else {
LOG(FATAL) << "not reachable";
}
} else if constexpr (std::is_same_v<ColumnVector<T>, ColumnType>) {
// DateV2 and DateTimeV2, its storage format is equal to compute format
// not need convert
insert_default_value_res_column(sel, sel_size, column);
} else if constexpr (std::is_same_v<ColumnDecimal<T>, ColumnType>) {
insert_default_value_res_column(sel, sel_size, column);
@ -477,18 +485,6 @@ public:
insert_string_to_res_column(sel, sel_size, column);
} else if constexpr (std::is_same_v<T, decimal12_t>) {
insert_decimal_to_res_column(sel, sel_size, column);
} else if constexpr (std::is_same_v<T, uint64_t>) {
if constexpr (Type == TYPE_DATETIMEV2) {
insert_default_value_res_column(sel, sel_size, column);
} else {
insert_datetime_to_res_column(sel, sel_size, column);
}
} else if constexpr (std::is_same_v<T, uint32_t>) {
if constexpr (Type == TYPE_DATEV2) {
insert_default_value_res_column(sel, sel_size, column);
} else {
insert_date_to_res_column(sel, sel_size, column);
}
} else if (std::is_same_v<T, bool>) {
insert_byte_to_res_column(sel, sel_size, col_ptr);
} else {