[ExceptNode] Implement except node (#3056)

implement except node,
support  statement like:

``` 
select a from t1 except select b from t2
```
This commit is contained in:
yangzhg
2020-03-17 10:54:40 +08:00
committed by GitHub
parent f6374fa9a5
commit 0959abc1dc
10 changed files with 297 additions and 123 deletions

View File

@ -44,6 +44,8 @@ Status IntersectNode::init(const TPlanNode& tnode, RuntimeState* state) {
Status IntersectNode::prepare(RuntimeState* state) {
RETURN_IF_ERROR(ExecNode::prepare(state));
_build_pool.reset(new MemPool(mem_tracker()));
_build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
_probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime");
SCOPED_TIMER(_runtime_profile->total_time_counter());
for (size_t i = 0; i < _child_expr_lists.size(); ++i) {
RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc(),
@ -57,7 +59,7 @@ Status IntersectNode::prepare(RuntimeState* state) {
TupleDescriptor* build_tuple_desc = child(0)->row_desc().tuple_descriptors()[i];
_build_tuple_idx.push_back(_row_descriptor.get_tuple_idx(build_tuple_desc->id()));
}
_find_nulls = std::vector<bool>(_child_expr_lists.size(), true);
_find_nulls = std::vector<bool>(_build_tuple_size, true);
return Status::OK();
}
@ -108,13 +110,14 @@ Status IntersectNode::open(RuntimeState* state) {
RETURN_IF_ERROR(child(0)->open(state));
bool eos = false;
while (!eos) {
SCOPED_TIMER(_build_timer);
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(0)->get_next(state, &build_batch, &eos));
// take ownership of tuple data of build_batch
_build_pool->acquire_data(build_batch.tuple_data_pool(), false);
RETURN_IF_LIMIT_EXCEEDED(state, " Intersect, while constructing the hash table.");
for (int i = 0; i < build_batch.num_rows(); ++i) {
_hash_tbl->insert(build_batch.get_row(i));
_hash_tbl->insert_unique(build_batch.get_row(i));
}
VLOG_ROW << "hash table content: " << _hash_tbl->debug_string(true, &child(0)->row_desc());
build_batch.reset();
@ -126,24 +129,8 @@ Status IntersectNode::open(RuntimeState* state) {
}
for (int i = 1; i < _children.size(); ++i) {
// probe
_probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker()));
RETURN_IF_ERROR(child(i)->open(state));
eos = false;
while (!eos) {
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos));
RETURN_IF_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table.");
for (int j = 0; j < _probe_batch->num_rows(); ++j) {
_hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j));
if (_hash_tbl_iterator != _hash_tbl->end()) {
_hash_tbl_iterator.set_matched();
}
}
_probe_batch->reset();
}
// rebuid hash table
if (i != _children.size() - 1) {
if (i > 1) {
SCOPED_TIMER(_build_timer);
std::unique_ptr<HashTable> temp_tbl(
new HashTable(_child_expr_lists[0], _child_expr_lists[i], _build_tuple_size,
true, _find_nulls, id(), mem_tracker(), 1024));
@ -163,6 +150,23 @@ Status IntersectNode::open(RuntimeState* state) {
break;
}
}
// probe
_probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker()));
ScopedTimer<MonotonicStopWatch> probe_timer(_probe_timer);
RETURN_IF_ERROR(child(i)->open(state));
eos = false;
while (!eos) {
RETURN_IF_CANCELLED(state);
RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos));
RETURN_IF_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table.");
for (int j = 0; j < _probe_batch->num_rows(); ++j) {
_hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j));
if (_hash_tbl_iterator != _hash_tbl->end()) {
_hash_tbl_iterator.set_matched();
}
}
_probe_batch->reset();
}
}
_hash_tbl_iterator = _hash_tbl->begin();
return Status::OK();
@ -183,11 +187,12 @@ Status IntersectNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* e
uint8_t* out_ptr = reinterpret_cast<uint8_t*>(out_row);
memcpy(out_ptr, _hash_tbl_iterator.get_row(), _build_tuple_row_size);
out_batch->commit_last_row();
++_num_rows_returned;
}
_hash_tbl_iterator.next<false>();
*eos = !_hash_tbl_iterator.has_next();
if (out_batch->is_full() || out_batch->at_resource_limit()) {
*eos = !_hash_tbl_iterator.has_next() || reached_limit();
if (out_batch->is_full() || out_batch->at_resource_limit() || *eos) {
return Status::OK();
}
}