Files
doris/be/src/olap/selection_vector.h

166 lines
5.4 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include "util/bitmap.h"
namespace doris {
// Bit-vector representing the selection status of each row in a row block.
//
// It is used to scan data with column predicate.
// the bit in the selection vector is used to indicate whether the given row is live.
class SelectionVector {
public:
// Construct a new vector. The bits are initially in an indeterminate state.
// Call set_all_true() if you require all rows to be initially selected.
explicit SelectionVector(size_t row_capacity)
: _n_rows(row_capacity),
_n_bytes(BitmapSize(row_capacity)),
_bitmap(new uint8_t[_n_bytes]) {
CHECK_GT(_n_bytes, 0);
set_all_false();
}
// return the number of selected rows.
size_t count_selected() const { return Bits::Count(_bitmap.get(), _n_bytes); }
// Return true if any rows are selected, or false
// This is equivalent to (count_selected() > 0), but faster.
bool any_selected() const;
bool is_row_selected(size_t row) const {
DCHECK_LT(row, _n_rows);
return BitmapTest(_bitmap.get(), row);
}
void set_row_selected(size_t row) {
DCHECK_LT(row, _n_rows);
BitmapSet(_bitmap.get(), row);
}
void set_all_true() {
memset(_bitmap.get(), 0xff, _n_bytes);
pad_extra_bits_wit_zeroes();
}
void set_all_false() { memset(_bitmap.get(), 0, _n_bytes); }
void clear_bit(size_t row) {
DCHECK_LT(row, _n_rows);
return BitmapClear(_bitmap.get(), row);
}
uint8_t* mutable_bitmap() { return _bitmap.get(); }
const uint8_t* bitmap() const { return _bitmap.get(); }
size_t nrows() const { return _n_rows; }
std::string to_string() const { return BitmapToString(_bitmap.get(), _n_rows); }
private:
// Pads any non-byte-aligned bits at the end of the SelectionVector with
// zeroes.
//
// To improve performance, CountSelected() and AnySelected() evaluate the
// SelectionVector's bitmap in terms of bytes. As such, they consider all of
// the trailing bits, even if the bitmap's bit length is not byte-aligned and
// some trailing bits aren't part of the bitmap.
//
// To address this without sacrificing performance, we need to zero out all
// trailing bits at construction time, or after any operation that sets all
// bytes in bulk.
void pad_extra_bits_wit_zeroes() {
size_t bits_in_last_byte = _n_rows & 7;
if (bits_in_last_byte > 0) {
BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false);
}
}
private:
// row capacity
size_t _n_rows;
size_t _n_bytes;
std::unique_ptr<uint8_t[]> _bitmap;
DISALLOW_COPY_AND_ASSIGN(SelectionVector);
};
inline bool SelectionVector::any_selected() const {
size_t rem = _n_bytes;
const uint32_t* p32 = reinterpret_cast<const uint32_t*>(_bitmap.get());
while (rem >= 4) {
if (*p32 != 0) {
return true;
}
++p32;
rem -= 4;
}
const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
while (rem > 0) {
if (*p8 != 0) {
return true;
}
++p8;
--rem;
}
return false;
}
// A SelectionVectorView keeps track of where in the selection vector a given
// batch will start from. After processing a batch, Advance() should be called
// and the view will move forward by the appropriate amount. In this way, the
// underlying selection vector can easily be updated batch-by-batch.
class SelectionVectorView {
public:
// Constructs a new SelectionVectorView.
//
// The 'sel_vec' object must outlive this SelectionVectorView.
explicit SelectionVectorView(SelectionVector* sel_vec) : _sel_vec(sel_vec), _row_offset(0) {}
void advance(size_t skip) {
DCHECK_LE(skip, _sel_vec->nrows() - _row_offset);
_row_offset += skip;
}
void set_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
}
void clear_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
}
bool test_bit(size_t row_idx) {
DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx);
}
void clear_bits(size_t nrows) {
DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset);
BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false);
}
private:
SelectionVector* _sel_vec = nullptr;
size_t _row_offset;
};
} // namespace doris