diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 4307153739..565a9416d7 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -340,12 +340,18 @@ public: virtual void get_permutation(bool reverse, size_t limit, int nan_direction_hint, Permutation& res) const = 0; + // 32bit offsets for string + using Offset = UInt32; + using Offsets = PaddedPODArray; + + // 64bit offsets for array + using Offset64 = UInt64; + using Offsets64 = PaddedPODArray; + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. */ - using Offset = UInt64; - using Offsets = PaddedPODArray; virtual Ptr replicate(const Offsets& offsets) const = 0; virtual void replicate(const uint32_t* counts, size_t target_size, IColumn& column) const { diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 7ef683f651..c498b72345 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -55,7 +55,7 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of } if (!offsets_concrete->empty() && nested_column) { - Offset last_offset = offsets_concrete->get_data().back(); + auto last_offset = offsets_concrete->get_data().back(); /// This will also prevent possible overflow in offset. if (nested_column->size() != last_offset) { @@ -93,7 +93,7 @@ MutableColumnPtr ColumnArray::clone_resized(size_t to_size) const { res->get_data().insert_range_from(get_data(), 0, get_offsets()[to_size - 1]); } else { /// Copy column and append empty arrays for extra elements. - Offset offset = 0; + Offset64 offset = 0; if (from_size > 0) { res->get_offsets().assign(get_offsets().begin(), get_offsets().end()); res->get_data().insert_range_from(get_data(), 0, get_data().size()); @@ -304,8 +304,8 @@ void ColumnArray::insert_range_from(const IColumn& src, size_t start, size_t len get_data().insert_range_from(src_concrete.get_data(), nested_offset, nested_length); - Offsets& cur_offsets = get_offsets(); - const Offsets& src_offsets = src_concrete.get_offsets(); + auto& cur_offsets = get_offsets(); + const auto& src_offsets = src_concrete.get_offsets(); if (start == 0 && cur_offsets.empty()) { cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length); @@ -355,10 +355,10 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hin auto res = ColumnArray::create(data->clone_empty()); auto& res_elems = assert_cast&>(res->get_data()).get_data(); - Offsets& res_offsets = res->get_offsets(); + auto& res_offsets = res->get_offsets(); - filter_arrays_impl(assert_cast&>(*data).get_data(), get_offsets(), - res_elems, res_offsets, filt, result_size_hint); + filter_arrays_impl(assert_cast&>(*data).get_data(), + get_offsets(), res_elems, res_offsets, filt, result_size_hint); return res; } @@ -372,12 +372,12 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hin const ColumnString& src_string = typeid_cast(*data); const ColumnString::Chars& src_chars = src_string.get_chars(); - const Offsets& src_string_offsets = src_string.get_offsets(); - const Offsets& src_offsets = get_offsets(); + const auto& src_string_offsets = src_string.get_offsets(); + const auto& src_offsets = get_offsets(); ColumnString::Chars& res_chars = typeid_cast(res->get_data()).get_chars(); - Offsets& res_string_offsets = typeid_cast(res->get_data()).get_offsets(); - Offsets& res_offsets = res->get_offsets(); + auto& res_string_offsets = typeid_cast(res->get_data()).get_offsets(); + auto& res_offsets = res->get_offsets(); if (result_size_hint < 0) { res_chars.reserve(src_chars.size()); @@ -385,10 +385,10 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hin res_offsets.reserve(col_size); } - Offset prev_src_offset = 0; + Offset64 prev_src_offset = 0; Offset prev_src_string_offset = 0; - Offset prev_res_offset = 0; + Offset64 prev_res_offset = 0; Offset prev_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { @@ -450,7 +450,7 @@ ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hi res->data = data->filter(nested_filt, nested_result_size_hint); - Offsets& res_offsets = res->get_offsets(); + auto& res_offsets = res->get_offsets(); if (result_size_hint) res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); size_t current_offset = 0; @@ -566,18 +566,18 @@ ColumnPtr ColumnArray::replicate_number(const Offsets& replicate_offsets) const const typename ColumnVector::Container& src_data = typeid_cast&>(*data).get_data(); - const Offsets& src_offsets = get_offsets(); + const auto& src_offsets = get_offsets(); typename ColumnVector::Container& res_data = typeid_cast&>(res_arr.get_data()).get_data(); - Offsets& res_offsets = res_arr.get_offsets(); + auto& res_offsets = res_arr.get_offsets(); res_data.reserve(data->size() / col_size * replicate_offsets.back()); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; - Offset prev_data_offset = 0; - Offset current_new_offset = 0; + Offset64 prev_data_offset = 0; + Offset64 current_new_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; @@ -614,12 +614,12 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const const ColumnString& src_string = typeid_cast(*data); const ColumnString::Chars& src_chars = src_string.get_chars(); - const Offsets& src_string_offsets = src_string.get_offsets(); - const Offsets& src_offsets = get_offsets(); + const auto& src_string_offsets = src_string.get_offsets(); + const auto& src_offsets = get_offsets(); ColumnString::Chars& res_chars = typeid_cast(res_arr.get_data()).get_chars(); - Offsets& res_string_offsets = typeid_cast(res_arr.get_data()).get_offsets(); - Offsets& res_offsets = res_arr.get_offsets(); + auto& res_string_offsets = typeid_cast(res_arr.get_data()).get_offsets(); + auto& res_offsets = res_arr.get_offsets(); res_chars.reserve(src_chars.size() / col_size * replicate_offsets.back()); res_string_offsets.reserve(src_string_offsets.size() / col_size * replicate_offsets.back()); @@ -627,10 +627,10 @@ ColumnPtr ColumnArray::replicate_string(const Offsets& replicate_offsets) const Offset prev_replicate_offset = 0; - Offset prev_src_offset = 0; + Offset64 prev_src_offset = 0; Offset prev_src_string_offset = 0; - Offset current_res_offset = 0; + Offset64 current_res_offset = 0; Offset current_res_string_offset = 0; for (size_t i = 0; i < col_size; ++i) { @@ -682,15 +682,15 @@ ColumnPtr ColumnArray::replicate_const(const Offsets& replicate_offsets) const { if (0 == col_size) return clone_empty(); - const Offsets& src_offsets = get_offsets(); + const auto& src_offsets = get_offsets(); auto res_column_offsets = ColumnOffsets::create(); - Offsets& res_offsets = res_column_offsets->get_data(); + auto& res_offsets = res_column_offsets->get_data(); res_offsets.reserve(replicate_offsets.back()); Offset prev_replicate_offset = 0; - Offset prev_data_offset = 0; - Offset current_new_offset = 0; + Offset64 prev_data_offset = 0; + Offset64 current_new_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset; @@ -719,7 +719,7 @@ ColumnPtr ColumnArray::replicate_generic(const Offsets& replicate_offsets) const if (0 == col_size) return res; - IColumn::Offset prev_offset = 0; + Offset64 prev_offset = 0; for (size_t i = 0; i < col_size; ++i) { size_t size_to_replicate = replicate_offsets[i] - prev_offset; prev_offset = replicate_offsets[i]; diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 50f864fbb9..686089f4e9 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -67,7 +67,7 @@ public: } /** On the index i there is an offset to the beginning of the i + 1 -th element. */ - using ColumnOffsets = ColumnVector; + using ColumnOffsets = ColumnVector; std::string get_name() const override; const char* get_family_name() const override { return "Array"; } @@ -118,11 +118,11 @@ public: IColumn& get_offsets_column() { return *offsets; } const IColumn& get_offsets_column() const { return *offsets; } - Offsets& ALWAYS_INLINE get_offsets() { + Offsets64& ALWAYS_INLINE get_offsets() { return assert_cast(*offsets).get_data(); } - const Offsets& ALWAYS_INLINE get_offsets() const { + const Offsets64& ALWAYS_INLINE get_offsets() const { return assert_cast(*offsets).get_data(); } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 3adf082ae0..c8b99e8ffa 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -111,7 +111,8 @@ ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint) con Chars& res_chars = res->chars; Offsets& res_offsets = res->offsets; - filter_arrays_impl(chars, offsets, res_chars, res_offsets, filt, result_size_hint); + filter_arrays_impl(chars, offsets, res_chars, res_offsets, filt, + result_size_hint); return res; } diff --git a/be/src/vec/columns/columns_common.cpp b/be/src/vec/columns/columns_common.cpp index 02183b9876..8ab45e112b 100644 --- a/be/src/vec/columns/columns_common.cpp +++ b/be/src/vec/columns/columns_common.cpp @@ -98,11 +98,12 @@ namespace { /// Implementation details of filterArraysImpl function, used as template parameter. /// Allow to build or not to build offsets array. +template struct ResultOffsetsBuilder { - IColumn::Offsets& res_offsets; - IColumn::Offset current_src_offset = 0; + PaddedPODArray& res_offsets; + OT current_src_offset = 0; - explicit ResultOffsetsBuilder(IColumn::Offsets* res_offsets_) : res_offsets(*res_offsets_) {} + explicit ResultOffsetsBuilder(PaddedPODArray* res_offsets_) : res_offsets(*res_offsets_) {} void reserve(ssize_t result_size_hint, size_t src_size) { res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); @@ -114,12 +115,10 @@ struct ResultOffsetsBuilder { } template - void insert_chunk(const IColumn::Offset* src_offsets_pos, bool first, - IColumn::Offset chunk_offset, size_t chunk_size) { + void insert_chunk(const OT* src_offsets_pos, bool first, OT chunk_offset, size_t chunk_size) { const auto offsets_size_old = res_offsets.size(); res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES); - memcpy(&res_offsets[offsets_size_old], src_offsets_pos, - SIMD_BYTES * sizeof(IColumn::Offset)); + memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(OT)); if (!first) { /// difference between current and actual offset @@ -138,19 +137,20 @@ struct ResultOffsetsBuilder { } }; +template struct NoResultOffsetsBuilder { - explicit NoResultOffsetsBuilder(IColumn::Offsets*) {} + explicit NoResultOffsetsBuilder(PaddedPODArray*) {} void reserve(ssize_t, size_t) {} void insert_one(size_t) {} template - void insert_chunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {} + void insert_chunk(const OT*, bool, OT, size_t) {} }; -template +template void filter_arrays_impl_generic(const PaddedPODArray& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray& res_elems, - IColumn::Offsets* res_offsets, const IColumn::Filter& filt, + const PaddedPODArray& src_offsets, PaddedPODArray& res_elems, + PaddedPODArray* res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint) { const size_t size = src_offsets.size(); if (size != filt.size()) { @@ -175,7 +175,7 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, const auto offsets_begin = offsets_pos; /// copy array ending at *end_offset_ptr - const auto copy_array = [&](const IColumn::Offset* offset_ptr) { + const auto copy_array = [&](const OT* offset_ptr) { const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1]; const auto arr_size = *offset_ptr - arr_offset; @@ -229,41 +229,52 @@ void filter_arrays_impl_generic(const PaddedPODArray& src_elems, } } // namespace -template -void filter_arrays_impl(const PaddedPODArray& src_elems, const IColumn::Offsets& src_offsets, - PaddedPODArray& res_elems, IColumn::Offsets& res_offsets, +template +void filter_arrays_impl(const PaddedPODArray& src_elems, const PaddedPODArray& src_offsets, + PaddedPODArray& res_elems, PaddedPODArray& res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint) { - return filter_arrays_impl_generic( + return filter_arrays_impl_generic>( src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint); } -template +template void filter_arrays_impl_only_data(const PaddedPODArray& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray& res_elems, - const IColumn::Filter& filt, ssize_t result_size_hint) { - return filter_arrays_impl_generic(src_elems, src_offsets, res_elems, - nullptr, filt, result_size_hint); + const PaddedPODArray& src_offsets, + PaddedPODArray& res_elems, const IColumn::Filter& filt, + ssize_t result_size_hint) { + return filter_arrays_impl_generic>( + src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint); } /// Explicit instantiations - not to place the implementation of the function above in the header file. -#define INSTANTIATE(TYPE) \ - template void filter_arrays_impl(const PaddedPODArray&, const IColumn::Offsets&, \ - PaddedPODArray&, IColumn::Offsets&, \ - const IColumn::Filter&, ssize_t); \ - template void filter_arrays_impl_only_data( \ - const PaddedPODArray&, const IColumn::Offsets&, PaddedPODArray&, \ +#define INSTANTIATE(TYPE, OFFTYPE) \ + template void filter_arrays_impl( \ + const PaddedPODArray&, const PaddedPODArray&, PaddedPODArray&, \ + PaddedPODArray&, const IColumn::Filter&, ssize_t); \ + template void filter_arrays_impl_only_data( \ + const PaddedPODArray&, const PaddedPODArray&, PaddedPODArray&, \ const IColumn::Filter&, ssize_t); -INSTANTIATE(UInt8) -INSTANTIATE(UInt16) -INSTANTIATE(UInt32) -INSTANTIATE(UInt64) -INSTANTIATE(Int8) -INSTANTIATE(Int16) -INSTANTIATE(Int32) -INSTANTIATE(Int64) -INSTANTIATE(Float32) -INSTANTIATE(Float64) +INSTANTIATE(UInt8, IColumn::Offset) +INSTANTIATE(UInt8, IColumn::Offset64) +INSTANTIATE(UInt16, IColumn::Offset) +INSTANTIATE(UInt16, IColumn::Offset64) +INSTANTIATE(UInt32, IColumn::Offset) +INSTANTIATE(UInt32, IColumn::Offset64) +INSTANTIATE(UInt64, IColumn::Offset) +INSTANTIATE(UInt64, IColumn::Offset64) +INSTANTIATE(Int8, IColumn::Offset) +INSTANTIATE(Int8, IColumn::Offset64) +INSTANTIATE(Int16, IColumn::Offset) +INSTANTIATE(Int16, IColumn::Offset64) +INSTANTIATE(Int32, IColumn::Offset) +INSTANTIATE(Int32, IColumn::Offset64) +INSTANTIATE(Int64, IColumn::Offset) +INSTANTIATE(Int64, IColumn::Offset64) +INSTANTIATE(Float32, IColumn::Offset) +INSTANTIATE(Float32, IColumn::Offset64) +INSTANTIATE(Float64, IColumn::Offset) +INSTANTIATE(Float64, IColumn::Offset64) #undef INSTANTIATE diff --git a/be/src/vec/columns/columns_common.h b/be/src/vec/columns/columns_common.h index dc9116e343..7308816005 100644 --- a/be/src/vec/columns/columns_common.h +++ b/be/src/vec/columns/columns_common.h @@ -39,15 +39,16 @@ bool memory_is_zero(const void* data, size_t size); bool memory_is_byte(const void* data, size_t size, uint8_t byte); /// The general implementation of `filter` function for ColumnArray and ColumnString. -template -void filter_arrays_impl(const PaddedPODArray& src_elems, const IColumn::Offsets& src_offsets, - PaddedPODArray& res_elems, IColumn::Offsets& res_offsets, +template +void filter_arrays_impl(const PaddedPODArray& src_elems, const PaddedPODArray& src_offsets, + PaddedPODArray& res_elems, PaddedPODArray& res_offsets, const IColumn::Filter& filt, ssize_t result_size_hint); /// Same as above, but not fills res_offsets. -template +template void filter_arrays_impl_only_data(const PaddedPODArray& src_elems, - const IColumn::Offsets& src_offsets, PaddedPODArray& res_elems, - const IColumn::Filter& filt, ssize_t result_size_hint); + const PaddedPODArray& src_offsets, + PaddedPODArray& res_elems, const IColumn::Filter& filt, + ssize_t result_size_hint); } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_array.cpp b/be/src/vec/data_types/data_type_array.cpp index 2fb0805e49..7301e6e0e5 100644 --- a/be/src/vec/data_types/data_type_array.cpp +++ b/be/src/vec/data_types/data_type_array.cpp @@ -58,7 +58,7 @@ size_t DataTypeArray::get_number_of_dimensions() const { int64_t DataTypeArray::get_uncompressed_serialized_bytes(const IColumn& column) const { auto ptr = column.convert_to_full_column_if_const(); const auto& data_column = assert_cast(*ptr.get()); - return sizeof(IColumn::Offset) * (column.size() + 1) + + return sizeof(IColumn::Offset64) * (column.size() + 1) + get_nested_type()->get_uncompressed_serialized_bytes(data_column.get_data()); } @@ -67,11 +67,11 @@ char* DataTypeArray::serialize(const IColumn& column, char* buf) const { const auto& data_column = assert_cast(*ptr.get()); // row num - *reinterpret_cast(buf) = column.size(); - buf += sizeof(IColumn::Offset); + *reinterpret_cast(buf) = column.size(); + buf += sizeof(IColumn::Offset64); // offsets - memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset)); - buf += column.size() * sizeof(IColumn::Offset); + memcpy(buf, data_column.get_offsets().data(), column.size() * sizeof(IColumn::Offset64)); + buf += column.size() * sizeof(IColumn::Offset64); // children return get_nested_type()->serialize(data_column.get_data(), buf); } @@ -81,12 +81,12 @@ const char* DataTypeArray::deserialize(const char* buf, IColumn* column) const { auto& offsets = data_column->get_offsets(); // row num - IColumn::Offset row_num = *reinterpret_cast(buf); - buf += sizeof(IColumn::Offset); + IColumn::Offset64 row_num = *reinterpret_cast(buf); + buf += sizeof(IColumn::Offset64); // offsets offsets.resize(row_num); - memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * row_num); - buf += sizeof(IColumn::Offset) * row_num; + memcpy(offsets.data(), buf, sizeof(IColumn::Offset64) * row_num); + buf += sizeof(IColumn::Offset64) * row_num; // children return get_nested_type()->deserialize(buf, data_column->get_data_ptr()->assume_mutable()); } diff --git a/be/src/vec/functions/array/function_array_aggregation.cpp b/be/src/vec/functions/array/function_array_aggregation.cpp index 42351fd7eb..f09a0a72fa 100644 --- a/be/src/vec/functions/array/function_array_aggregation.cpp +++ b/be/src/vec/functions/array/function_array_aggregation.cpp @@ -176,7 +176,7 @@ struct ArrayAggregateImpl { template static bool execute_type(ColumnPtr& res_ptr, const DataTypePtr& type, const IColumn* data, - const ColumnArray::Offsets& offsets) { + const ColumnArray::Offsets64& offsets) { using ColVecType = ColumnVectorOrDecimal; using ResultType = ArrayAggregateResult; using ColVecResultType = ColumnVectorOrDecimal; diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h index 7d9c989c29..77e997aba6 100644 --- a/be/src/vec/functions/array/function_array_distinct.h +++ b/be/src/vec/functions/array/function_array_distinct.h @@ -71,7 +71,7 @@ public: auto dest_column_ptr = ColumnArray::create(nested_type->create_column(), ColumnArray::ColumnOffsets::create()); IColumn* dest_nested_column = &dest_column_ptr->get_data(); - ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets(); + auto& dest_offsets = dest_column_ptr->get_offsets(); DCHECK(dest_nested_column != nullptr); dest_nested_column->reserve(src_nested_column->size()); dest_offsets.reserve(input_rows_count); @@ -109,8 +109,8 @@ private: static constexpr size_t INITIAL_SIZE_DEGREE = 5; template - bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { using NestType = typename ColumnType::value_type; using ElementNativeType = typename NativeType::Type; @@ -128,13 +128,13 @@ private: INITIAL_SIZE_DEGREE>; Set set; - ColumnArray::Offset prev_src_offset = 0; - ColumnArray::Offset res_offset = 0; + size_t prev_src_offset = 0; + size_t res_offset = 0; for (auto curr_src_offset : src_offsets) { set.clear(); size_t null_size = 0; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); (*dest_null_map).push_back(true); @@ -162,8 +162,8 @@ private: return true; } - bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { const ColumnString* src_data_concrete = reinterpret_cast(&src_column); if (!src_data_concrete) { @@ -178,13 +178,13 @@ private: using Set = HashSetWithStackMemory, INITIAL_SIZE_DEGREE>; Set set; - ColumnArray::Offset prev_src_offset = 0; - ColumnArray::Offset res_offset = 0; + size_t prev_src_offset = 0; + size_t res_offset = 0; for (auto curr_src_offset : src_offsets) { set.clear(); size_t null_size = 0; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); // Note: here we need to update the offset of ColumnString @@ -221,8 +221,8 @@ private: return true; } - bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map, DataTypePtr& nested_type) { bool res = false; @@ -268,4 +268,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index d04a1b605b..6722e09e9c 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -82,7 +82,7 @@ public: private: template - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { const auto& nested_data = reinterpret_cast(nested_column).get_data(); @@ -123,7 +123,7 @@ private: return dst_column; } - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map) { const auto& src_str_offs = diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index ab81490267..cd17feff8d 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -67,7 +67,7 @@ public: } private: - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data const auto& str_offs = reinterpret_cast(nested_column).get_offsets(); @@ -110,7 +110,7 @@ private: } template - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const UInt8* nested_null_map, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { // check array nested column type and get data const auto& nested_data = @@ -144,7 +144,7 @@ private: } template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets, + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, const UInt8* nested_null_map, const IColumn& nested_column, const IColumn& right_column) { if (check_column(right_column)) { diff --git a/be/src/vec/functions/array/function_array_join.h b/be/src/vec/functions/array/function_array_join.h index 452ba0df0b..180e65c21f 100644 --- a/be/src/vec/functions/array/function_array_join.h +++ b/be/src/vec/functions/array/function_array_join.h @@ -117,7 +117,8 @@ private: } template - static bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_number(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, DataTypePtr& nested_type, ColumnString* dest_column_ptr) { @@ -129,10 +130,10 @@ private: return false; } - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { std::string result_str; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && src_null_map[j]) { if (null_replace_str.size() == 0) { continue; @@ -160,7 +161,8 @@ private: return true; } - static bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_string(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, ColumnString* dest_column_ptr) { @@ -169,10 +171,10 @@ private: return false; } - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { std::string result_str; - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && src_null_map[j]) { if (null_replace_str.size() == 0) { continue; @@ -193,7 +195,8 @@ private: return true; } - static bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, + static bool _execute_by_type(const IColumn& src_column, + const ColumnArray::Offsets64& src_offsets, const UInt8* src_null_map, const std::string& sep_str, const std::string& null_replace_str, DataTypePtr& nested_type, ColumnString* dest_column_ptr) { @@ -240,4 +243,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_remove.h b/be/src/vec/functions/array/function_array_remove.h index a291a53bc3..6565102fb7 100644 --- a/be/src/vec/functions/array/function_array_remove.h +++ b/be/src/vec/functions/array/function_array_remove.h @@ -65,7 +65,7 @@ public: private: template - ColumnPtr _execute_number(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { // check array nested column type and get data const auto& src_data = reinterpret_cast(nested_column).get_data(); @@ -135,7 +135,7 @@ private: return dst; } - ColumnPtr _execute_string(const ColumnArray::Offsets& offsets, const IColumn& nested_column, + ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { // check array nested column type and get data const auto& src_offs = reinterpret_cast(nested_column).get_offsets(); @@ -224,7 +224,7 @@ private: } template - ColumnPtr _execute_number_expanded(const ColumnArray::Offsets& offsets, + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const IColumn& right_column, const UInt8* nested_null_map) { if (check_column(right_column)) { diff --git a/be/src/vec/functions/array/function_array_reverse.h b/be/src/vec/functions/array/function_array_reverse.h index bc6891a29b..0714542614 100644 --- a/be/src/vec/functions/array/function_array_reverse.h +++ b/be/src/vec/functions/array/function_array_reverse.h @@ -58,10 +58,10 @@ struct ArrayReverseImpl { } static bool _execute_internal(const IColumn& src_column, - const ColumnArray::Offsets& src_offsets, IColumn& dest_column, - ColumnArray::Offsets& dest_offsets, const UInt8* src_null_map, + const ColumnArray::Offsets64& src_offsets, IColumn& dest_column, + ColumnArray::Offsets64& dest_offsets, const UInt8* src_null_map, ColumnUInt8::Container* dest_null_map) { - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; for (auto curr_src_offset : src_offsets) { size_t array_size = curr_src_offset - prev_src_offset; @@ -89,4 +89,4 @@ struct ArrayReverseImpl { } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_sort.h b/be/src/vec/functions/array/function_array_sort.h index ccab8b8e40..87fa684b13 100644 --- a/be/src/vec/functions/array/function_array_sort.h +++ b/be/src/vec/functions/array/function_array_sort.h @@ -67,7 +67,7 @@ public: auto dest_column_ptr = ColumnArray::create(nested_type->create_column(), ColumnArray::ColumnOffsets::create()); IColumn* dest_nested_column = &dest_column_ptr->get_data(); - ColumnArray::Offsets& dest_offsets = dest_column_ptr->get_offsets(); + auto& dest_offsets = dest_column_ptr->get_offsets(); DCHECK(dest_nested_column != nullptr); dest_nested_column->reserve(src_nested_column->size()); dest_offsets.reserve(input_rows_count); @@ -103,15 +103,15 @@ public: private: // sort the non-null element according to the permutation template - void _sort_by_permutation(ColumnArray::Offset& prev_offset, - const ColumnArray::Offset& curr_offset, + void _sort_by_permutation(ColumnArray::Offset64& prev_offset, + const ColumnArray::Offset64& curr_offset, const SrcDataType* src_data_concrete, const IColumn& src_column, const NullMapType* src_null_map, IColumn::Permutation& permutation) { - for (ColumnArray::Offset j = prev_offset; j + 1 < curr_offset; ++j) { + for (size_t j = prev_offset; j + 1 < curr_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } - for (ColumnArray::Offset k = j + 1; k < curr_offset; ++k) { + for (size_t k = j + 1; k < curr_offset; ++k) { if (src_null_map && (*src_null_map)[k]) { continue; } @@ -128,8 +128,8 @@ private: } template - bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_number(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { using NestType = typename ColumnType::value_type; const ColumnType* src_data_concrete = reinterpret_cast(&src_column); @@ -141,7 +141,7 @@ private: ColumnType& dest_data_concrete = reinterpret_cast(dest_column); PaddedPODArray& dest_datas = dest_data_concrete.get_data(); - ColumnArray::Offset prev_src_offset = 0; + ColumnArray::Offset64 prev_src_offset = 0; IColumn::Permutation permutation(src_column.size()); for (size_t i = 0; i < src_column.size(); ++i) { permutation[i] = i; @@ -149,7 +149,7 @@ private: for (auto curr_src_offset : src_offsets) { // filter and insert null element first - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); (*dest_null_map).push_back(true); @@ -161,7 +161,7 @@ private: src_column, src_null_map, permutation); // insert non-null element after sort by permutation - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } @@ -178,8 +178,8 @@ private: return true; } - bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) { const ColumnString* src_data_concrete = reinterpret_cast(&src_column); if (!src_data_concrete) { @@ -191,7 +191,7 @@ private: ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); column_string_chars.reserve(src_column.size()); - ColumnArray::Offset prev_src_offset = 0; + size_t prev_src_offset = 0; IColumn::Permutation permutation(src_column.size()); for (size_t i = 0; i < src_column.size(); ++i) { permutation[i] = i; @@ -199,7 +199,7 @@ private: for (auto curr_src_offset : src_offsets) { // filter and insert null element first - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { DCHECK(dest_null_map != nullptr); column_string_offsets.push_back(column_string_offsets.back()); @@ -211,7 +211,7 @@ private: src_column, src_null_map, permutation); // insert non-null element after sort by permutation - for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + for (size_t j = prev_src_offset; j < curr_src_offset; ++j) { if (src_null_map && (*src_null_map)[j]) { continue; } @@ -238,8 +238,8 @@ private: return true; } - bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets& src_offsets, - IColumn& dest_column, ColumnArray::Offsets& dest_offsets, + bool _execute_by_type(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, + IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map, DataTypePtr& nested_type) { bool res = false; @@ -285,4 +285,4 @@ private: } }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_utils.h b/be/src/vec/functions/array/function_array_utils.h index 0e0ebeb3b2..b4859f502b 100644 --- a/be/src/vec/functions/array/function_array_utils.h +++ b/be/src/vec/functions/array/function_array_utils.h @@ -35,7 +35,7 @@ public: public: const UInt8* array_nullmap_data = nullptr; const ColumnArray* array_col = nullptr; - const ColumnArray::Offsets* offsets_ptr = nullptr; + const ColumnArray::Offsets64* offsets_ptr = nullptr; const UInt8* nested_nullmap_data = nullptr; const IColumn* nested_col = nullptr; }; @@ -45,7 +45,7 @@ public: MutableColumnPtr array_nested_col = nullptr; ColumnUInt8::Container* nested_nullmap_data = nullptr; MutableColumnPtr offsets_col = nullptr; - ColumnArray::Offsets* offsets_ptr = nullptr; + ColumnArray::Offsets64* offsets_ptr = nullptr; IColumn* nested_col = nullptr; }; diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index e0f650d30c..c0cb214fcd 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -618,9 +618,9 @@ private: const auto& string_column = reinterpret_cast(*array_nested_column); const Chars& string_src_chars = string_column.get_chars(); - const Offsets& src_string_offsets = string_column.get_offsets(); - const Offsets& src_array_offsets = array_column.get_offsets(); - ColumnArray::Offset current_src_array_offset = 0; + const auto& src_string_offsets = string_column.get_offsets(); + const auto& src_array_offsets = array_column.get_offsets(); + size_t current_src_array_offset = 0; // Concat string in array for (size_t i = 0; i < input_rows_count; ++i) { diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 3188808b54..c970ec1092 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -196,10 +196,10 @@ void block_to_pb( } void fill_block_with_array_int(vectorized::Block& block) { - auto off_column = vectorized::ColumnVector::create(); + auto off_column = vectorized::ColumnVector::create(); auto data_column = vectorized::ColumnVector::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector offs = {0, 3, 3, 4, 6}; + std::vector offs = {0, 3, 3, 4, 6}; std::vector vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -218,10 +218,10 @@ void fill_block_with_array_int(vectorized::Block& block) { } void fill_block_with_array_string(vectorized::Block& block) { - auto off_column = vectorized::ColumnVector::create(); + auto off_column = vectorized::ColumnVector::create(); auto data_column = vectorized::ColumnString::create(); // init column array with [["abc","de"],["fg"],[], [""]]; - std::vector offs = {0, 2, 3, 3, 4}; + std::vector offs = {0, 2, 3, 3, 4}; std::vector vals = {"abc", "de", "fg", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); diff --git a/be/test/vec/core/column_array_test.cpp b/be/test/vec/core/column_array_test.cpp index 60725501ab..7b71e0e4df 100644 --- a/be/test/vec/core/column_array_test.cpp +++ b/be/test/vec/core/column_array_test.cpp @@ -28,7 +28,7 @@ namespace doris::vectorized { -void check_array_offsets(const IColumn& arr, const std::vector& offs) { +void check_array_offsets(const IColumn& arr, const std::vector& offs) { auto arr_col = check_and_get_column(arr); ASSERT_EQ(arr_col->size(), offs.size()); for (size_t i = 0; i < arr_col->size(); ++i) { @@ -57,10 +57,10 @@ void check_array_data(const IColumn& arr, const std::vector& data) } TEST(ColumnArrayTest, IntArrayTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnVector::create(); // init column array with [[1,2,3],[],[4]] - std::vector offs = {0, 3, 3, 4}; + std::vector offs = {0, 3, 3, 4}; std::vector vals = {1, 2, 3, 4}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -82,10 +82,10 @@ TEST(ColumnArrayTest, IntArrayTest) { } TEST(ColumnArrayTest, StringArrayTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector offs = {0, 2, 3, 3, 4}; + std::vector offs = {0, 2, 3, 3, 4}; std::vector vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -107,10 +107,10 @@ TEST(ColumnArrayTest, StringArrayTest) { } TEST(ColumnArrayTest, IntArrayPermuteTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnVector::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector offs = {0, 3, 3, 4, 6}; + std::vector offs = {0, 3, 3, 4, 6}; std::vector vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -133,10 +133,10 @@ TEST(ColumnArrayTest, IntArrayPermuteTest) { } TEST(ColumnArrayTest, StringArrayPermuteTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector offs = {0, 2, 3, 3, 4}; + std::vector offs = {0, 2, 3, 3, 4}; std::vector vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -159,10 +159,10 @@ TEST(ColumnArrayTest, StringArrayPermuteTest) { } TEST(ColumnArrayTest, EmptyArrayPermuteTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnVector::create(); // init column array with [[],[],[],[]] - std::vector offs = {0, 0, 0, 0, 0}; + std::vector offs = {0, 0, 0, 0, 0}; std::vector vals = {}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -185,10 +185,10 @@ TEST(ColumnArrayTest, EmptyArrayPermuteTest) { } TEST(ColumnArrayTest, IntArrayReplicateTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnVector::create(); // init column array with [[1,2,3],[],[4],[5,6]] - std::vector offs = {0, 3, 3, 4, 6}; + std::vector offs = {0, 3, 3, 4, 6}; std::vector vals = {1, 2, 3, 4, 5, 6}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); @@ -209,10 +209,10 @@ TEST(ColumnArrayTest, IntArrayReplicateTest) { } TEST(ColumnArrayTest, StringArrayReplicateTest) { - auto off_column = ColumnVector::create(); + auto off_column = ColumnVector::create(); auto data_column = ColumnString::create(); // init column array with [["abc","d"],["ef"],[], [""]]; - std::vector offs = {0, 2, 3, 3, 4}; + std::vector offs = {0, 2, 3, 3, 4}; std::vector vals = {"abc", "d", "ef", ""}; for (size_t i = 1; i < offs.size(); ++i) { off_column->insert_data((const char*)(&offs[i]), 0); diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp index afa5bf7c6f..6cc32c05a3 100644 --- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp +++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp @@ -613,7 +613,7 @@ TEST(ArrowColumnToDorisColumnTest, test_binary) { template static inline std::shared_ptr create_array_array( - std::vector& vec_offsets, std::vector& null_map, + std::vector& vec_offsets, std::vector& null_map, std::shared_ptr value_type, std::shared_ptr values, size_t& counter) { using offset_type = typename arrow::ListType::offset_type; @@ -646,7 +646,7 @@ static inline std::shared_ptr create_array_array( template void test_arrow_to_array_column(ColumnWithTypeAndName& column, - std::vector& vec_offsets, + std::vector& vec_offsets, std::vector& null_map, std::shared_ptr value_type, std::shared_ptr values, const std::string& value, @@ -698,7 +698,7 @@ void test_arrow_to_array_column(ColumnWithTypeAndName& column, template void test_array(const std::vector& test_cases, size_t num_elements, - std::vector& vec_offsets, std::vector& null_map, + std::vector& vec_offsets, std::vector& null_map, std::shared_ptr value_type) { TypeDescriptor type(TYPE_ARRAY); type.children.push_back(TYPE_VARCHAR); @@ -724,7 +724,7 @@ void test_array(const std::vector& test_cases, size_t num_elements, TEST(ArrowColumnToDorisColumnTest, test_array) { std::vector test_cases = {"1.2345678", "-12.34567890", "99999999999.99999999", "-99999999999.99999999"}; - std::vector vec_offsets = {0, 3, 3, 4, 6, 6, 64}; + std::vector vec_offsets = {0, 3, 3, 4, 6, 6, 64}; std::vector null_map = {false, true, false, false, false, false}; test_array(test_cases, 64, vec_offsets, null_map, arrow::list(arrow::binary()));