// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include #include #include #include #include #include "common/global_types.h" #include "common/status.h" #include "exprs/runtime_filter_slots.h" #include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/columns_number.h" #include "vec/common/aggregation_common.h" #include "vec/common/arena.h" #include "vec/common/columns_hashing.h" #include "vec/common/hash_table/partitioned_hash_map.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/types.h" #include "vec/exec/join/join_op.h" // IWYU pragma: keep #include "vec/runtime/shared_hash_table_controller.h" #include "vjoin_node_base.h" template struct HashCRC32; namespace doris { class ObjectPool; class IRuntimeFilter; class DescriptorTbl; class RuntimeState; namespace vectorized { struct UInt128; struct UInt256; template struct ProcessHashTableProbe; template struct SerializedHashTableContext { using Mapped = RowRefListType; using HashTable = PartitionedHashMap; using State = ColumnsHashing::HashMethodSerialized; using Iter = typename HashTable::iterator; HashTable hash_table; Iter iter; bool inited = false; std::vector keys; size_t keys_memory_usage = 0; void serialize_keys(const ColumnRawPtrs& key_columns, size_t num_rows) { if (keys.size() < num_rows) { keys.resize(num_rows); } _arena.reset(new Arena()); keys_memory_usage = 0; size_t keys_size = key_columns.size(); for (size_t i = 0; i < num_rows; ++i) { keys[i] = serialize_keys_to_pool_contiguous(i, keys_size, key_columns, *_arena); } keys_memory_usage = _arena->size(); } void init_once() { if (!inited) { inited = true; iter = hash_table.begin(); } } private: std::unique_ptr _arena; }; // T should be UInt32 UInt64 UInt128 template struct PrimaryTypeHashTableContext { using Mapped = RowRefListType; using HashTable = PartitionedHashMap>; using State = ColumnsHashing::HashMethodOneNumber; using Iter = typename HashTable::iterator; HashTable hash_table; Iter iter; bool inited = false; void init_once() { if (!inited) { inited = true; iter = hash_table.begin(); } } }; // TODO: use FixedHashTable instead of HashTable template using I8HashTableContext = PrimaryTypeHashTableContext; template using I16HashTableContext = PrimaryTypeHashTableContext; template using I32HashTableContext = PrimaryTypeHashTableContext; template using I64HashTableContext = PrimaryTypeHashTableContext; template using I128HashTableContext = PrimaryTypeHashTableContext; template using I256HashTableContext = PrimaryTypeHashTableContext; template struct FixedKeyHashTableContext { using Mapped = RowRefListType; using HashTable = PartitionedHashMap>; using State = ColumnsHashing::HashMethodKeysFixed; using Iter = typename HashTable::iterator; HashTable hash_table; Iter iter; bool inited = false; void init_once() { if (!inited) { inited = true; iter = hash_table.begin(); } } }; template using I64FixedKeyHashTableContext = FixedKeyHashTableContext; template using I128FixedKeyHashTableContext = FixedKeyHashTableContext; template using I256FixedKeyHashTableContext = FixedKeyHashTableContext; using HashTableVariants = std::variant< std::monostate, SerializedHashTableContext, I8HashTableContext, I16HashTableContext, I32HashTableContext, I64HashTableContext, I128HashTableContext, I256HashTableContext, I64FixedKeyHashTableContext, I64FixedKeyHashTableContext, I128FixedKeyHashTableContext, I128FixedKeyHashTableContext, I256FixedKeyHashTableContext, I256FixedKeyHashTableContext, SerializedHashTableContext, I8HashTableContext, I16HashTableContext, I32HashTableContext, I64HashTableContext, I128HashTableContext, I256HashTableContext, I64FixedKeyHashTableContext, I64FixedKeyHashTableContext, I128FixedKeyHashTableContext, I128FixedKeyHashTableContext, I256FixedKeyHashTableContext, I256FixedKeyHashTableContext, SerializedHashTableContext, I8HashTableContext, I16HashTableContext, I32HashTableContext, I64HashTableContext, I128HashTableContext, I256HashTableContext, I64FixedKeyHashTableContext, I64FixedKeyHashTableContext, I128FixedKeyHashTableContext, I128FixedKeyHashTableContext, I256FixedKeyHashTableContext, I256FixedKeyHashTableContext>; class VExprContext; using HashTableCtxVariants = std::variant, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe, ProcessHashTableProbe>; using HashTableIteratorVariants = std::variant, ForwardIterator, ForwardIterator>; class HashJoinNode final : public VJoinNodeBase { public: // TODO: Best prefetch step is decided by machine. We should also provide a // SQL hint to allow users to tune by hand. static constexpr int PREFETCH_STEP = 64; HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); ~HashJoinNode() override; Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; Status prepare(RuntimeState* state) override; Status open(RuntimeState* state) override; Status get_next(RuntimeState* state, Block* block, bool* eos) override; Status close(RuntimeState* state) override; void add_hash_buckets_info(const std::string& info); void add_hash_buckets_filled_info(const std::string& info); Status alloc_resource(RuntimeState* state) override; void release_resource(RuntimeState* state) override; Status sink(doris::RuntimeState* state, vectorized::Block* input_block, bool eos) override; bool need_more_input_data() const; Status pull(RuntimeState* state, vectorized::Block* output_block, bool* eos) override; Status push(RuntimeState* state, vectorized::Block* input_block, bool eos) override; void prepare_for_next() override; void debug_string(int indentation_level, std::stringstream* out) const override; bool can_sink_write() const { if (_should_build_hash_table) { return true; } return _shared_hash_table_context && _shared_hash_table_context->signaled; } bool should_build_hash_table() const { return _should_build_hash_table; } private: using VExprContexts = std::vector; // probe expr VExprContexts _probe_expr_ctxs; // build expr VExprContexts _build_expr_ctxs; // other expr std::unique_ptr _vother_join_conjunct_ptr; // mark the join column whether support null eq std::vector _is_null_safe_eq_join; // mark the build hash table whether it needs to store null value std::vector _store_null_in_hash_table; std::vector _probe_column_disguise_null; std::vector _probe_column_convert_to_null; DataTypes _right_table_data_types; DataTypes _left_table_data_types; RuntimeProfile::Counter* _build_table_timer; RuntimeProfile::Counter* _build_expr_call_timer; RuntimeProfile::Counter* _build_table_insert_timer; RuntimeProfile::Counter* _build_table_expanse_timer; RuntimeProfile::Counter* _build_table_convert_timer; RuntimeProfile::Counter* _probe_expr_call_timer; RuntimeProfile::Counter* _probe_next_timer; RuntimeProfile::Counter* _build_buckets_counter; RuntimeProfile::Counter* _build_buckets_fill_counter; RuntimeProfile::Counter* _push_down_timer; RuntimeProfile::Counter* _push_compute_timer; RuntimeProfile::Counter* _search_hashtable_timer; RuntimeProfile::Counter* _build_side_output_timer; RuntimeProfile::Counter* _probe_side_output_timer; RuntimeProfile::Counter* _build_side_compute_hash_timer; RuntimeProfile::Counter* _build_side_merge_block_timer; RuntimeProfile::Counter* _build_blocks_memory_usage; RuntimeProfile::Counter* _hash_table_memory_usage; RuntimeProfile::HighWaterMarkCounter* _build_arena_memory_usage; RuntimeProfile::HighWaterMarkCounter* _probe_arena_memory_usage; RuntimeProfile* _build_phase_profile; std::shared_ptr _arena; // maybe share hash table with other fragment instances std::shared_ptr _hash_table_variants; std::unique_ptr _process_hashtable_ctx_variants; // for full/right outer join HashTableIteratorVariants _outer_join_pull_visited_iter; HashTableIteratorVariants _probe_row_match_iter; std::shared_ptr> _build_blocks; Block _probe_block; ColumnRawPtrs _probe_columns; ColumnUInt8::MutablePtr _null_map_column; bool _need_null_map_for_probe = false; bool _has_set_need_null_map_for_probe = false; bool _has_set_need_null_map_for_build = false; bool _probe_ignore_null = false; int _probe_index = -1; bool _probe_eos = false; bool _build_side_ignore_null = false; Sizes _probe_key_sz; Sizes _build_key_sz; bool _is_broadcast_join = false; bool _should_build_hash_table = true; std::shared_ptr _shared_hashtable_controller = nullptr; VRuntimeFilterSlots* _runtime_filter_slots = nullptr; std::vector _hash_output_slot_ids; std::vector _left_output_slot_flags; std::vector _right_output_slot_flags; // for cases when a probe row matches more than batch size build rows. bool _is_any_probe_match_row_output = false; uint8_t _build_block_idx = 0; int64_t _build_side_mem_used = 0; int64_t _build_side_last_mem_used = 0; MutableBlock _build_side_mutable_block; SharedHashTableContextPtr _shared_hash_table_context = nullptr; Status _materialize_build_side(RuntimeState* state) override; Status _process_build_block(RuntimeState* state, Block& block, uint8_t offset); Status _do_evaluate(Block& block, std::vector& exprs, RuntimeProfile::Counter& expr_call_timer, std::vector& res_col_ids); template Status _extract_join_column(Block& block, ColumnUInt8::MutablePtr& null_map, ColumnRawPtrs& raw_ptrs, const std::vector& res_col_ids); bool _need_probe_null_map(Block& block, const std::vector& res_col_ids); void _set_build_ignore_flag(Block& block, const std::vector& res_col_ids); void _hash_table_init(RuntimeState* state); void _process_hashtable_ctx_variants_init(RuntimeState* state); static constexpr auto _MAX_BUILD_BLOCK_COUNT = 128; void _prepare_probe_block(); static std::vector _convert_block_to_null(Block& block); void _release_mem(); // add tuple is null flag column to Block for filter conjunct and output expr void _add_tuple_is_null_column(Block* block) override; template friend struct ProcessHashTableBuild; template friend struct ProcessHashTableProbe; template friend struct ProcessRuntimeFilterBuild; std::vector _runtime_filter_descs; std::unordered_map> _inserted_rows; std::vector _runtime_filters; size_t _build_bf_cardinality = 0; }; } // namespace vectorized } // namespace doris