[Bug](join) fix columnstr64's offset overflow on serialize_value_into_arena #46461 (#46462)

pick from #46461
This commit is contained in:
Pxl
2025-01-07 19:59:35 +08:00
committed by GitHub
parent 6bf31a2a0d
commit 2f98a6216e
4 changed files with 76 additions and 9 deletions

View File

@ -356,8 +356,8 @@ ColumnPtr ColumnStr<T>::permute(const IColumn::Permutation& perm, size_t limit)
template <typename T>
StringRef ColumnStr<T>::serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const {
uint32_t string_size(size_at(n));
uint32_t offset(offset_at(n));
auto string_size(size_at(n));
auto offset(offset_at(n));
StringRef res;
res.size = sizeof(string_size) + string_size;
@ -389,7 +389,7 @@ size_t ColumnStr<T>::get_max_row_byte_size() const {
size_t max_size = 0;
size_t num_rows = offsets.size();
for (size_t i = 0; i < num_rows; ++i) {
max_size = std::max(max_size, size_at(i));
max_size = std::max(max_size, size_t(size_at(i)));
}
return max_size + sizeof(uint32_t);
@ -399,8 +399,8 @@ template <typename T>
void ColumnStr<T>::serialize_vec(std::vector<StringRef>& keys, size_t num_rows,
size_t max_row_byte_size) const {
for (size_t i = 0; i < num_rows; ++i) {
uint32_t offset(offset_at(i));
uint32_t string_size(size_at(i));
auto offset(offset_at(i));
auto string_size(size_at(i));
auto* ptr = const_cast<char*>(keys[i].data + keys[i].size);
memcpy_fixed<uint32_t>(ptr, (char*)&string_size);
@ -414,8 +414,8 @@ void ColumnStr<T>::serialize_vec_with_null_map(std::vector<StringRef>& keys, siz
const uint8_t* null_map) const {
for (size_t i = 0; i < num_rows; ++i) {
if (null_map[i] == 0) {
uint32_t offset(offset_at(i));
uint32_t string_size(size_at(i));
auto offset(offset_at(i));
auto string_size(size_at(i));
auto* ptr = const_cast<char*>(keys[i].data + keys[i].size);
memcpy_fixed<uint32_t>(ptr, (char*)&string_size);

View File

@ -87,8 +87,11 @@ private:
size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }
/// Size of i-th element, including terminating zero.
size_t ALWAYS_INLINE size_at(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
// Size of i-th element, including terminating zero.
// assume that the length of a single element is less than 32-bit
uint32_t ALWAYS_INLINE size_at(ssize_t i) const {
return uint32_t(offsets[i] - offsets[i - 1]);
}
template <bool positive>
struct less;

View File

@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !test --
50000000
-- !test --
50000000

View File

@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("str64_serialize") {
sql """ DROP TABLE IF EXISTS d_table; """
sql """ DROP TABLE IF EXISTS d_table2; """
sql """
create table d_table (
k1 int null,
k2 int not null,
k3 bigint null,
k4 varchar(100) null
)
duplicate key (k1,k2,k3)
distributed BY hash(k1) buckets 3
properties("replication_num" = "1");
"""
sql """
create table d_table2 (
k1 int null,
k2 int not null,
k3 bigint null,
k4 varchar(100) null
)
duplicate key (k1,k2,k3)
distributed BY hash(k1) buckets 3
properties("replication_num" = "1");
"""
sql """insert into d_table select 1,1,1,'1234567890abcdefghigalsdhaluihdicandejionxaoxwdeuhwenudzmwoedxneiowdxiowedjxneiowdjixoneiiexdnuiexef' from (select 1 k1) as t lateral view explode_numbers(50000000) tmp1 as e1;
"""
sql """insert into d_table2 select 1,1,1,'1234567890abcdefghigalsdhaluihdicandejionxaoxwdeuhwenudzmwoedxneiowdxiowedjxneiowdjixoneiiexdnuiexef';
"""
sql "set parallel_pipeline_task_num=1;"
qt_test "select /*+ LEADING(a,b) */ count(*) from d_table as a, d_table2 as b where a.k4=b.k4 and a.k1=b.k1;"
qt_test "select /*+ LEADING(b,a) */ count(*) from d_table as a, d_table2 as b where a.k4=b.k4 and a.k1=b.k1;"
}