Files
oceanbase/src/sql/engine/recursive_cte/ob_search_method_op.h

363 lines
13 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OB_DEPTH_FIRST_SEARCH_OP_H_
#define OB_DEPTH_FIRST_SEARCH_OP_H_
#include "sql/engine/aggregate/ob_exec_hash_struct.h"
#include "lib/allocator/ob_malloc.h"
#include "lib/list/ob_list.h"
#include "share/datum/ob_datum_funcs.h"
#include "sql/engine/basic/ob_chunk_datum_store.h"
#include "sql/engine/sort/ob_sort_basic_info.h"
#include "lib/rc/context.h"
namespace oceanbase
{
namespace sql
{
class ObSearchMethodOp
{
public:
typedef struct _BreadthFirstSearchTreeNode {
_BreadthFirstSearchTreeNode() :
child_num_(0),
stored_row_(nullptr),
children_(nullptr),
parent_(nullptr)
{}
int64_t child_num_;
ObChunkDatumStore::StoredRow* stored_row_;
struct _BreadthFirstSearchTreeNode** children_;
struct _BreadthFirstSearchTreeNode* parent_;
TO_STRING_KV("row ", stored_row_, "child_num_", child_num_);
} ObBFSTreeNode;
typedef struct _TreeNode
{
_TreeNode() :
is_cycle_(false),
tree_level_(0),
stored_row_(nullptr),
in_bstree_node_(nullptr)
{
}
bool is_cycle_;
uint64_t tree_level_;
ObChunkDatumStore::StoredRow* stored_row_;
ObBFSTreeNode* in_bstree_node_;
TO_STRING_KV("is tree level", tree_level_, "is cycle", is_cycle_, "row", stored_row_);
} ObTreeNode;
struct ObNodeComparer
{
explicit ObNodeComparer(
const common::ObIArray<ObSortFieldCollation> &sort_collations,
const common::ObIArray<ObExpr *> &exprs,
int *err)
: sort_collations_(sort_collations),
exprs_(exprs),
err_(err)
{
}
bool operator()(const ObTreeNode &r1, const ObTreeNode &r2)
{
const ObChunkDatumStore::StoredRow *l = r1.stored_row_;
const ObChunkDatumStore::StoredRow *r = r2.stored_row_;
return cmp_stored_row(l, r);
}
bool operator()(const ObChunkDatumStore::StoredRow *l, const ObChunkDatumStore::StoredRow *r)
{
return cmp_stored_row(l, r);
}
inline bool cmp_stored_row(const ObChunkDatumStore::StoredRow *l,
const ObChunkDatumStore::StoredRow *r)
{
bool bret = false;
if (OB_UNLIKELY(common::OB_SUCCESS != *err_)) {
// do nothing if we already have an error,
// so we can finish the sort process ASAP.
} else if (OB_ISNULL(l)
|| OB_ISNULL(r)
|| (l->cnt_ != r->cnt_)) {
*err_ = OB_ERR_UNEXPECTED;
SQL_LOG_RET(WARN, *err_, "invalid parameter", KPC(l), KPC(r), K(*err_));
} else {
const ObDatum *lcells = l->cells();
const ObDatum *rcells = r->cells();
int cmp = 0;
for (int64_t i = 0; OB_SUCCESS == *err_ && 0 == cmp && i < sort_collations_.count(); i++) {
const int64_t idx = sort_collations_.at(i).field_idx_;
if (idx >= exprs_.count()) {
*err_ = OB_ERR_UNEXPECTED;
SQL_LOG_RET(WARN, *err_, "compare column id greater than exprs count", K(*err_),
K(idx), K(exprs_.count()));
} else {
bool null_first = (NULL_FIRST == sort_collations_.at(i).null_pos_);
ObExprCmpFuncType cmp_func = null_first ? exprs_.at(idx)->basic_funcs_->null_first_cmp_
: exprs_.at(idx)->basic_funcs_->null_last_cmp_;
*err_ = cmp_func(lcells[idx], rcells[idx], cmp);
if (OB_SUCCESS != *err_) {
SQL_LOG_RET(WARN, *err_, "cmp failed", K(idx), KPC(l), KPC(r), K(*err_));
} else if (cmp < 0) {
bret = sort_collations_.at(i).is_ascending_;
} else if (cmp > 0) {
bret = !sort_collations_.at(i).is_ascending_;
}
}
}
}
return bret;
}
private:
const common::ObIArray<ObSortFieldCollation> &sort_collations_;
//用于获取比较函数的表达式
const common::ObIArray<ObExpr *> &exprs_;
int *err_;
};
class ObCycleHash
{
public:
ObCycleHash()
: row_(NULL),
hash_col_idx_(NULL),
exprs_(NULL),
hash_val_(0)
{
}
ObCycleHash(const ObChunkDatumStore::StoredRow *row,
const common::ObIArray<uint64_t> *hash_col_idx,
const common::ObIArray<ObExpr *> *exprs)
: row_(row),
hash_col_idx_(hash_col_idx),
exprs_(exprs),
hash_val_(0)
{
}
~ObCycleHash()
{}
uint64_t hash() const
{
if (hash_val_ == 0) {
hash_val_ = inner_hash();
}
return hash_val_;
}
int hash(uint64_t &hash_val) const
{
hash_val = hash();
return OB_SUCCESS;
}
uint64_t inner_hash() const;
bool operator ==(const ObCycleHash &other) const;
public:
const ObChunkDatumStore::StoredRow *row_;
const common::ObIArray<uint64_t> *hash_col_idx_;
const common::ObIArray<ObExpr *> *exprs_;
mutable uint64_t hash_val_;
};
// input row的初始化大小,以128开始。
static const int64_t INIT_ROW_COUNT = 1<<7l;
// 探测深度优先路径上是否成环的哈希表大小,32的树高足够了。
static const int64_t CTE_SET_NUM = 1<<5l;
public:
explicit ObSearchMethodOp(common::ObIAllocator &allocator, const ExprFixedArray &left_output,
const common::ObIArray<ObSortFieldCollation> &sort_collations,
const common::ObIArray<uint64_t> &cycle_by_columns)
: allocator_(allocator), input_rows_(),
sort_collations_(sort_collations), cycle_by_columns_(cycle_by_columns),
left_output_(left_output), last_node_level_(UINT64_MAX) {};
virtual ~ObSearchMethodOp() = default;
virtual int empty() = 0;
virtual int reuse();
virtual int add_row(const ObIArray<ObExpr *> &exprs, ObEvalCtx &eval_ctx);
int sort_input_rows();
int sort_rownodes(common::ObArray<ObTreeNode> &sort_array);
// 使用行内容进行比较,若有一样的数据则认为此节点为环
int is_same_row(ObChunkDatumStore::StoredRow &row_1st, ObChunkDatumStore::StoredRow &row_2nd,
bool &is_cycle);
int64_t count() { return input_rows_.count(); }
virtual uint64_t get_last_node_level() { return last_node_level_; }
const static int64_t ROW_EXTRA_SIZE = 0;
protected:
// hard code seed, 24bit max prime number
static const int64_t HASH_SEED = 16777213;
common::ObIAllocator &allocator_;
common::ObArray<ObChunkDatumStore::StoredRow *> input_rows_;
const common::ObIArray<ObSortFieldCollation> &sort_collations_;
const common::ObIArray<uint64_t> &cycle_by_columns_;
common::ObArray<ObChunkDatumStore::StoredRow*> recycle_rows_;
const ExprFixedArray &left_output_;
// 记录当前查询行在树中的level
uint64_t last_node_level_;
};
class ObDepthFirstSearchOp : public ObSearchMethodOp
{
typedef common::hash::ObHashSet<ObCycleHash, common::hash::NoPthreadDefendMode> RowMap;
public:
ObDepthFirstSearchOp(common::ObIAllocator &allocator, const ExprFixedArray &left_output,
const common::ObIArray<ObSortFieldCollation> &sort_collations,
const common::ObIArray<uint64_t> &cycle_by_columns) :
ObSearchMethodOp(allocator, left_output, sort_collations, cycle_by_columns),
hash_filter_rows_(), hash_col_idx_(),
current_search_path_(), search_stack_(allocator_)
{ }
virtual ~ObDepthFirstSearchOp() {
if (hash_filter_rows_.created()) {
hash_filter_rows_.destroy();
}
}
virtual void destroy();
virtual int reuse() override;
virtual int empty() override { return search_stack_.empty() && input_rows_.empty(); }
int init();
int finish_add_row(bool sort);
int adjust_stack(ObTreeNode &node);
int get_next_nocycle_node(common::ObList<ObTreeNode, common::ObIAllocator> &result_output,
ObTreeNode &nocycle_node);
private:
// 检测一个node是不是环节点
int is_depth_cycle_node(ObTreeNode &node);
private:
RowMap hash_filter_rows_;
common::ObSEArray<uint64_t, 32> hash_col_idx_;
/**
* level
* A 0
* AA AB 1
* AAA AAB ABA ABB 2
* 例如一次查询中过程中,AA节点是上一次塞給右支进行查询的节点,
* current_search_stack_中包含A AA,其count=level+1。
* 新获得的结果AAA和AAB。search_stack_经历这次查询由A AB AA,
* 变成A AB AAA AAB。
*/
common::ObArray<ObChunkDatumStore::StoredRow *> current_search_path_;
common::ObList<ObTreeNode, common::ObIAllocator> search_stack_;
};
/**
* 由于需要判断环的存在,广度优先整个树都会被保存在内存中;
* 能用深度优先的时候尽量不要使用广度优先。
*/
class ObBreadthFirstSearchOp : public ObSearchMethodOp
{
public:
ObBreadthFirstSearchOp(common::ObIAllocator &allocator, const ExprFixedArray &left_output,
const common::ObIArray<ObSortFieldCollation> &sort_collations,
const common::ObIArray<uint64_t> &cycle_by_columns) :
ObSearchMethodOp(allocator, left_output, sort_collations, cycle_by_columns), bst_root_(),
current_parent_node_(&bst_root_), search_queue_(allocator), search_results_() {
last_node_level_ = 0;
}
virtual ~ObBreadthFirstSearchOp() = default;
virtual int reuse() override;
virtual void destroy();
virtual int empty() override { return input_rows_.empty() && search_queue_.empty()
&& search_results_.empty(); }
int add_result_rows();
int finish_add_row(bool sort);
int get_next_nocycle_node(common::ObList<ObTreeNode, common::ObIAllocator> &result_output,
ObTreeNode &nocycle_node);
int update_parent_node(ObTreeNode &node);
private:
int init_new_nodes(ObBFSTreeNode *last_bstnode, int64_t child_num);
int is_breadth_cycle_node(ObTreeNode &node);
int add_new_level();
private:
// breadth first search的root节点
ObBFSTreeNode bst_root_;
/**
* A
* AA AB
* AAA AAB ABA ABB
* 例如一次查询中过程中,current_parent_node_指向AA
* search_queue_中包含AA AB是查询层
* search_results_中AAA AAB是查询结果层
*/
ObBFSTreeNode* current_parent_node_;
common::ObList<ObTreeNode, common::ObIAllocator> search_queue_;
common::ObArray<ObTreeNode> search_results_;
};
class ObBreadthFirstSearchBulkOp : public ObSearchMethodOp
{
public:
ObBreadthFirstSearchBulkOp(common::ObIAllocator &allocator,
const ExprFixedArray &left_output,
const common::ObIArray<ObSortFieldCollation> &sort_collations,
const common::ObIArray<uint64_t> &cycle_by_columns) :
ObSearchMethodOp(allocator, left_output, sort_collations, cycle_by_columns), bst_root_(),
search_results_(), cur_recursion_depth_(0), cur_iter_groups_(), last_iter_groups_(),
max_buffer_cnt_(0), result_output_buffer_(nullptr), mem_context_(nullptr),
malloc_allocator_(nullptr) {}
virtual ~ObBreadthFirstSearchBulkOp() = default;
virtual int reuse() override;
virtual void destroy();
virtual int empty() override { return input_rows_.empty() && search_results_.empty(); }
int add_result_rows(bool left_branch, int64_t identify_seq_offset);
int get_next_nocycle_bulk(ObList<ObTreeNode, common::ObIAllocator> &result_output,
ObArray<ObChunkDatumStore::StoredRow *> &fake_table_bulk_rows,
bool need_sort);
int update_search_depth(uint64_t max_recursive_depth);
int sort_result_output_nodes(int64_t rows_cnt);
int add_row(const ObIArray<ObExpr *> &exprs, ObEvalCtx &eval_ctx);
int init_mem_context();
void free_input_rows_mem();
void free_last_iter_mem();
private:
int is_breadth_cycle_node(ObBFSTreeNode* node, ObChunkDatumStore::StoredRow *row, bool &is_cycle);
int save_to_store_row(ObIAllocator &allocator, const ObIArray<ObExpr *> &exprs,
ObEvalCtx &eval_ctx, ObChunkDatumStore::StoredRow *&store_row);
private:
// breadth first search的root节点
ObBFSTreeNode bst_root_;
common::ObArray<ObTreeNode> search_results_;
uint64_t cur_recursion_depth_;
common::ObArray<ObBFSTreeNode *> cur_iter_groups_;
common::ObArray<ObBFSTreeNode *> last_iter_groups_;
int64_t max_buffer_cnt_;
ObTreeNode ** result_output_buffer_;
lib::MemoryContext mem_context_;
ObIAllocator *malloc_allocator_;
// for mysql mode, free last iter memory because not need check cycle
common::ObArray<ObChunkDatumStore::StoredRow *> last_iter_input_rows_;
};
}
}
#endif