doris/be/src/exec/partitioned_hash_table.inline.h

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_EXEC_PARTITIONED_HASH_TABLE_INLINE_H
#define DORIS_BE_SRC_EXEC_PARTITIONED_HASH_TABLE_INLINE_H

#include "exec/partitioned_hash_table.h"

namespace doris {

inline bool PartitionedHashTableCtx::eval_and_hash_build(TupleRow* row, uint32_t* hash) {
    bool has_null = EvalBuildRow(row);
    if (!_stores_nulls && has_null) {
        return false;
    }
    *hash = HashCurrentRow();
    return true;
}

inline bool PartitionedHashTableCtx::eval_and_hash_probe(TupleRow* row, uint32_t* hash) {
    bool has_null = EvalProbeRow(row);
    if ((!_stores_nulls || !_finds_nulls) && has_null) {
        return false;
    }
    *hash = HashCurrentRow();
    return true;
}

inline int64_t PartitionedHashTable::probe(Bucket* buckets, int64_t num_buckets,
        PartitionedHashTableCtx* ht_ctx, uint32_t hash, bool* found) {
    DCHECK(buckets != NULL);
    DCHECK_GT(num_buckets, 0);
    *found = false;
    int64_t bucket_idx = hash & (num_buckets - 1);

    // In case of linear probing it counts the total number of steps for statistics and
    // for knowing when to exit the loop (e.g. by capping the total travel length). In case
    // of quadratic probing it is also used for calculating the length of the next jump.
    int64_t step = 0;
    do {
        Bucket* bucket = &buckets[bucket_idx];
        if (!bucket->filled) {
            return bucket_idx;
        }
        if (hash == bucket->hash) {
            if (ht_ctx != NULL && ht_ctx->equals(get_row(bucket, ht_ctx->_row))) {
                *found = true;
                return bucket_idx;
            }
            // Row equality failed, or not performed. This is a hash collision. Continue
            // searching.
            ++_num_hash_collisions;
        }
        // Move to the next bucket.
        ++step;
        ++_travel_length;
        if (_quadratic_probing) {
            // The i-th probe location is idx = (hash + (step * (step + 1)) / 2) mod num_buckets.
            // This gives num_buckets unique idxs (between 0 and N-1) when num_buckets is a power
            // of 2.
            bucket_idx = (bucket_idx + step) & (num_buckets - 1);
        } else {
            bucket_idx = (bucket_idx + 1) & (num_buckets - 1);
        }
    } while (LIKELY(step < num_buckets));
    DCHECK_EQ(_num_filled_buckets, num_buckets) << "Probing of a non-full table "
        << "failed: " << _quadratic_probing << " " << hash;
    return Iterator::BUCKET_NOT_FOUND;
}

inline PartitionedHashTable::HtData* PartitionedHashTable::insert_internal(
        PartitionedHashTableCtx* ht_ctx, uint32_t hash) {
    ++_num_probes;
    bool found = false;
    int64_t bucket_idx = probe(_buckets, _num_buckets, ht_ctx, hash, &found);
    DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND);
    if (found) {
        // We need to insert a duplicate node, note that this may fail to allocate memory.
        DuplicateNode* new_node = insert_duplicate_node(bucket_idx);
        if (UNLIKELY(new_node == NULL)) {
            return NULL;
        }
        return &new_node->htdata;
    } else {
        prepare_bucket_for_insert(bucket_idx, hash);
        return &_buckets[bucket_idx].bucketData.htdata;
    }
}

inline bool PartitionedHashTable::insert(PartitionedHashTableCtx* ht_ctx,
        const BufferedTupleStream2::RowIdx& idx, TupleRow* row, uint32_t hash) {
    if (_stores_tuples) {
        return insert(ht_ctx, row->get_tuple(0), hash);
    }
    HtData* htdata = insert_internal(ht_ctx, hash);
    // If successful insert, update the contents of the newly inserted entry with 'idx'.
    if (LIKELY(htdata != NULL)) {
        htdata->idx = idx;
        return true;
    }
    return false;
}

inline bool PartitionedHashTable::insert(
        PartitionedHashTableCtx* ht_ctx, Tuple* tuple, uint32_t hash) {
    DCHECK(_stores_tuples);
    HtData* htdata = insert_internal(ht_ctx, hash);
    // If successful insert, update the contents of the newly inserted entry with 'tuple'.
    if (LIKELY(htdata != NULL)) {
        htdata->tuple = tuple;
        return true;
    }
    return false;
}

inline PartitionedHashTable::Iterator PartitionedHashTable::find(
        PartitionedHashTableCtx* ht_ctx, uint32_t hash) {
    ++_num_probes;
    bool found = false;
    int64_t bucket_idx = probe(_buckets, _num_buckets, ht_ctx, hash, &found);
    if (found) {
        return Iterator(this, ht_ctx->row(), bucket_idx,
                _buckets[bucket_idx].bucketData.duplicates);
    }
    return End();
}

inline PartitionedHashTable::Iterator PartitionedHashTable::find_bucket(
        PartitionedHashTableCtx* ht_ctx, uint32_t hash,
        bool* found) {
    ++_num_probes;
    int64_t bucket_idx = probe(_buckets, _num_buckets, ht_ctx, hash, found);
    DuplicateNode* duplicates = LIKELY(bucket_idx != Iterator::BUCKET_NOT_FOUND) ?
        _buckets[bucket_idx].bucketData.duplicates : NULL;
    return Iterator(this, ht_ctx->row(), bucket_idx, duplicates);
}

inline PartitionedHashTable::Iterator PartitionedHashTable::begin(PartitionedHashTableCtx* ctx) {
    int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND;
    DuplicateNode* node = NULL;
    next_filled_bucket(&bucket_idx, &node);
    return Iterator(this, ctx->row(), bucket_idx, node);
}

inline PartitionedHashTable::Iterator PartitionedHashTable::first_unmatched(
        PartitionedHashTableCtx* ctx) {
    int64_t bucket_idx = Iterator::BUCKET_NOT_FOUND;
    DuplicateNode* node = NULL;
    next_filled_bucket(&bucket_idx, &node);
    Iterator it(this, ctx->row(), bucket_idx, node);
    // Check whether the bucket, or its first duplicate node, is matched. If it is not
    // matched, then return. Otherwise, move to the first unmatched entry (node or bucket).
    Bucket* bucket = &_buckets[bucket_idx];
    if ((!bucket->hasDuplicates && bucket->matched) ||
            (bucket->hasDuplicates && node->matched)) {
        it.next_unmatched();
    }
    return it;
}

inline void PartitionedHashTable::next_filled_bucket(int64_t* bucket_idx, DuplicateNode** node) {
    ++*bucket_idx;
    for (; *bucket_idx < _num_buckets; ++*bucket_idx) {
        if (_buckets[*bucket_idx].filled) {
            *node = _buckets[*bucket_idx].bucketData.duplicates;
            return;
        }
    }
    // Reached the end of the hash table.
    *bucket_idx = Iterator::BUCKET_NOT_FOUND;
    *node = NULL;
}

inline void PartitionedHashTable::prepare_bucket_for_insert(int64_t bucket_idx, uint32_t hash) {
    DCHECK_GE(bucket_idx, 0);
    DCHECK_LT(bucket_idx, _num_buckets);
    Bucket* bucket = &_buckets[bucket_idx];
    DCHECK(!bucket->filled);
    ++_num_filled_buckets;
    bucket->filled = true;
    bucket->matched = false;
    bucket->hasDuplicates = false;
    bucket->hash = hash;
}

inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::append_next_node(
        Bucket* bucket) {
    DCHECK_GT(_node_remaining_current_page, 0);
    bucket->bucketData.duplicates = _next_node;
    ++_num_duplicate_nodes;
    --_node_remaining_current_page;
    return _next_node++;
}

inline PartitionedHashTable::DuplicateNode* PartitionedHashTable::insert_duplicate_node(
        int64_t bucket_idx) {
    DCHECK_GE(bucket_idx, 0);
    DCHECK_LT(bucket_idx, _num_buckets);
    Bucket* bucket = &_buckets[bucket_idx];
    DCHECK(bucket->filled);
    // Allocate one duplicate node for the new data and one for the preexisting data,
    // if needed.
    while (_node_remaining_current_page < 1 + !bucket->hasDuplicates) {
        if (UNLIKELY(!grow_node_array())) {
            return NULL;
        }
    }
    if (!bucket->hasDuplicates) {
        // This is the first duplicate in this bucket. It means that we need to convert
        // the current entry in the bucket to a node and link it from the bucket.
        _next_node->htdata.idx = bucket->bucketData.htdata.idx;
        DCHECK(!bucket->matched);
        _next_node->matched = false;
        _next_node->next = NULL;
        append_next_node(bucket);
        bucket->hasDuplicates = true;
        ++_num_buckets_with_duplicates;
    }
    // Link a new node.
    _next_node->next = bucket->bucketData.duplicates;
    _next_node->matched = false;
    return append_next_node(bucket);
}

inline TupleRow* PartitionedHashTable::get_row(HtData& htdata, TupleRow* row) const {
    if (_stores_tuples) {
        return reinterpret_cast<TupleRow*>(&htdata.tuple);
    } else {
        _tuple_stream->get_tuple_row(htdata.idx, row);
        return row;
    }
}

inline TupleRow* PartitionedHashTable::get_row(Bucket* bucket, TupleRow* row) const {
    DCHECK(bucket != NULL);
    if (UNLIKELY(bucket->hasDuplicates)) {
        DuplicateNode* duplicate = bucket->bucketData.duplicates;
        DCHECK(duplicate != NULL);
        return get_row(duplicate->htdata, row);
    } else {
        return get_row(bucket->bucketData.htdata, row);
    }
}

inline TupleRow* PartitionedHashTable::Iterator::get_row() const {
    DCHECK(!at_end());
    DCHECK(_table != NULL);
    DCHECK(_row != NULL);
    Bucket* bucket = &_table->_buckets[_bucket_idx];
    if (UNLIKELY(bucket->hasDuplicates)) {
        DCHECK(_node != NULL);
        return _table->get_row(_node->htdata, _row);
    } else {
        return _table->get_row(bucket->bucketData.htdata, _row);
    }
}

inline Tuple* PartitionedHashTable::Iterator::get_tuple() const {
    DCHECK(!at_end());
    DCHECK(_table->_stores_tuples);
    Bucket* bucket = &_table->_buckets[_bucket_idx];
    // TODO: To avoid the hasDuplicates check, store the HtData* in the Iterator.
    if (UNLIKELY(bucket->hasDuplicates)) {
        DCHECK(_node != NULL);
        return _node->htdata.tuple;
    } else {
        return bucket->bucketData.htdata.tuple;
    }
}

inline void PartitionedHashTable::Iterator::set_tuple(Tuple* tuple, uint32_t hash) {
    DCHECK(!at_end());
    DCHECK(_table->_stores_tuples);
    _table->prepare_bucket_for_insert(_bucket_idx, hash);
    _table->_buckets[_bucket_idx].bucketData.htdata.tuple = tuple;
}

inline void PartitionedHashTable::Iterator::set_matched() {
    DCHECK(!at_end());
    Bucket* bucket = &_table->_buckets[_bucket_idx];
    if (bucket->hasDuplicates) {
        _node->matched = true;
    } else {
        bucket->matched = true;
    }
    // Used for disabling spilling of hash tables in right and full-outer joins with
    // matches. See IMPALA-1488.
    _table->_has_matches = true;
}

inline bool PartitionedHashTable::Iterator::is_matched() const {
    DCHECK(!at_end());
    Bucket* bucket = &_table->_buckets[_bucket_idx];
    if (bucket->hasDuplicates) {
        return _node->matched;
    }
    return bucket->matched;
}

inline void PartitionedHashTable::Iterator::set_at_end() {
    _bucket_idx = BUCKET_NOT_FOUND;
    _node = NULL;
}

inline void PartitionedHashTable::Iterator::next() {
    DCHECK(!at_end());
    if (_table->_buckets[_bucket_idx].hasDuplicates && _node->next != NULL) {
        _node = _node->next;
    } else {
        _table->next_filled_bucket(&_bucket_idx, &_node);
    }
}

inline void PartitionedHashTable::Iterator::next_duplicate() {
    DCHECK(!at_end());
    if (_table->_buckets[_bucket_idx].hasDuplicates && _node->next != NULL) {
        _node = _node->next;
    } else {
        _bucket_idx = BUCKET_NOT_FOUND;
        _node = NULL;
    }
}

inline void PartitionedHashTable::Iterator::next_unmatched() {
    DCHECK(!at_end());
    Bucket* bucket = &_table->_buckets[_bucket_idx];
    // Check if there is any remaining unmatched duplicate node in the current bucket.
    if (bucket->hasDuplicates) {
        while (_node->next != NULL) {
            _node = _node->next;
            if (!_node->matched) {
                return;
            }
        }
    }
    // Move to the next filled bucket and return if this bucket is not matched or
    // iterate to the first not matched duplicate node.
    _table->next_filled_bucket(&_bucket_idx, &_node);
    while (_bucket_idx != Iterator::BUCKET_NOT_FOUND) {
        bucket = &_table->_buckets[_bucket_idx];
        if (!bucket->hasDuplicates) {
            if (!bucket->matched) {
                return;
            }
        } else {
            while (_node->matched && _node->next != NULL) {
                _node = _node->next;
            }
            if (!_node->matched) {
                return;
            }
        }
        _table->next_filled_bucket(&_bucket_idx, &_node);
    }
}

inline void PartitionedHashTableCtx::set_level(int level) {
    DCHECK_GE(level, 0);
    DCHECK_LT(level, _seeds.size());
    _level = level;
}

} // end namespace doris

#endif // DORIS_BE_SRC_EXEC_PARTITIONED_HASH_TABLE_INLINE_H