Files
doris/be/src/vec/exec/vset_operation_node.h
Jerry Hu 9f8de89659 [refactor](exec) replace the single pointer with an array of 'conjuncts' in ExecNode (#19758)
Refactoring the filtering conditions in the current ExecNode from an expression tree to an array can simplify the process of adding runtime filters. It eliminates the need for complex merge operations and removes the requirement for the frontend to combine expressions into a single entity.

By representing the filtering conditions as an array, each condition can be treated individually, making it easier to add runtime filters without the need for complex merging logic. The array can store the individual conditions, and the runtime filter logic can iterate through the array to apply the filters as needed.

This refactoring simplifies the codebase, improves readability, and reduces the complexity associated with handling filtering conditions and adding runtime filters. It separates the conditions into discrete entities, enabling more straightforward manipulation and management within the execution node.
2023-05-29 11:47:31 +08:00

141 lines
4.9 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <functional>
#include <iosfwd>
#include <memory>
#include <unordered_map>
#include <variant>
#include <vector>
#include "common/status.h"
#include "exec/exec_node.h"
#include "util/runtime_profile.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/common/arena.h"
#include "vec/common/hash_table/hash_map.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/exec/join/process_hash_table_probe.h"
#include "vec/exec/join/vhash_join_node.h"
namespace doris {
class DescriptorTbl;
class ObjectPool;
class RuntimeState;
class TPlanNode;
namespace vectorized {
class VExprContext;
struct RowRefListWithFlags;
template <bool is_intersect>
class VSetOperationNode final : public ExecNode {
public:
VSetOperationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override;
Status prepare(RuntimeState* state) override;
Status open(RuntimeState* state) override;
Status get_next(RuntimeState* state, Block* output_block, bool* eos) override;
Status close(RuntimeState* state) override;
void debug_string(int indentation_level, std::stringstream* out) const override;
Status alloc_resource(RuntimeState* state) override;
void release_resource(RuntimeState* state) override;
Status sink(RuntimeState* state, Block* block, bool eos) override;
Status pull(RuntimeState* state, Block* output_block, bool* eos) override;
Status sink_probe(RuntimeState* state, int child_id, Block* block, bool eos);
Status finalize_probe(RuntimeState* state, int child_id);
bool is_child_finished(int child_id) const;
private:
//Todo: In build process of hashtable, It's same as join node.
//It's time to abstract out the same methods and provide them directly to others;
void hash_table_init();
Status hash_table_build(RuntimeState* state);
Status process_build_block(Block& block, uint8_t offset, RuntimeState* state);
Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs);
Status extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs, int child_id);
void refresh_hash_table();
template <typename HashTableContext>
Status get_data_in_hashtable(HashTableContext& hash_table_ctx, Block* output_block,
const int batch_size, bool* eos);
void add_result_columns(RowRefListWithFlags& value, int& block_size);
void create_mutable_cols(Block* output_block);
void release_mem();
std::unique_ptr<HashTableVariants> _hash_table_variants;
std::vector<size_t> _probe_key_sz;
std::vector<size_t> _build_key_sz;
std::vector<bool> _build_not_ignore_null;
std::unique_ptr<Arena> _arena;
//record element size in hashtable
int64_t _valid_element_in_hash_tbl;
//The i-th result expr list refers to the i-th child.
std::vector<VExprContextSPtrs> _child_expr_lists;
//record build column type
DataTypes _left_table_data_types;
//first:column_id, could point to origin column or cast column
//second:idx mapped to column types
std::unordered_map<int, int> _build_col_idx;
//record memory during running
int64_t _mem_used;
//record insert column id during probe
std::vector<uint16_t> _probe_column_inserted_id;
std::vector<Block> _build_blocks;
Block _probe_block;
ColumnRawPtrs _probe_columns;
std::vector<MutableColumnPtr> _mutable_cols;
int _build_block_index;
bool _build_finished;
std::vector<bool> _probe_finished_children_index;
MutableBlock _mutable_block;
RuntimeProfile::Counter* _build_timer; // time to build hash table
RuntimeProfile::Counter* _probe_timer; // time to probe
RuntimeProfile::Counter* _pull_timer; // time to pull data
template <class HashTableContext, bool is_intersected>
friend struct HashTableBuild;
template <class HashTableContext, bool is_intersected>
friend struct HashTableProbe;
};
using VIntersectNode = VSetOperationNode<true>;
using VExceptNode = VSetOperationNode<false>;
} // namespace vectorized
} // namespace doris