* Add the bitmap intersection and difference set for mixed calculation of udaf Co-authored-by: zhangbinbin05 <zhangbinbin05@baidu.com>
217 lines
7.4 KiB
C++
217 lines
7.4 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
#pragma once
|
|
#include <stack>
|
|
#include <string>
|
|
|
|
#include "util/bitmap_intersect.h"
|
|
|
|
namespace doris {
|
|
|
|
// Compute the intersection union difference set of two or more bitmaps
|
|
// Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column, input_string)
|
|
// Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'), meaning find the intersection union difference set of user_id in all A/B/C/D 4 bitmaps
|
|
// Operation symbol:
|
|
// the operator '|' stands for union, the operator '&' stands for intersection, the operator '-' indicates the difference set, the operator '^' stands for xor
|
|
class BitmapExprCalculation : public BitmapIntersect<std::string> {
|
|
public:
|
|
BitmapExprCalculation() = default;
|
|
|
|
explicit BitmapExprCalculation(const char* src) { deserialize(src); }
|
|
|
|
void bitmap_calculation_init(std::string& input_str) {
|
|
_polish = reverse_polish(input_str);
|
|
std::string bitmap_key;
|
|
for (int i = 0; i < _polish.length(); i++) {
|
|
char c = _polish.at(i);
|
|
if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c != '\\') {
|
|
bitmap_key += c;
|
|
} else if (i != 0 && _polish.at(i - 1) == '\\') {
|
|
bitmap_key += c;
|
|
} else if (c == '\\') {
|
|
continue;
|
|
} else {
|
|
if (bitmap_key.length() > 0) {
|
|
add_key(bitmap_key);
|
|
bitmap_key.clear();
|
|
}
|
|
}
|
|
}
|
|
if (bitmap_key.length() > 0) {
|
|
add_key(bitmap_key);
|
|
bitmap_key.clear();
|
|
}
|
|
}
|
|
|
|
BitmapValue bitmap_calculate() {
|
|
std::stack<BitmapValue> values;
|
|
std::string bitmap_key;
|
|
for (int i = 0; i < _polish.length(); i++) {
|
|
char c = _polish.at(i);
|
|
if (c == ' ') {
|
|
if (bitmap_key.length() > 0) {
|
|
values.push(_bitmaps[bitmap_key]);
|
|
bitmap_key.clear();
|
|
}
|
|
} else if (c != '&' && c != '|' && c != '^' && c != '-' && c != '\\') {
|
|
bitmap_key += c;
|
|
} else if (i != 0 && _polish.at(i - 1) == '\\') {
|
|
bitmap_key += c;
|
|
} else if (c == '\\') {
|
|
continue;
|
|
} else {
|
|
if (bitmap_key.length() > 0) {
|
|
values.push(_bitmaps[bitmap_key]);
|
|
bitmap_key.clear();
|
|
}
|
|
if (values.size() >= 2) {
|
|
BitmapValue op_a = values.top();
|
|
values.pop();
|
|
BitmapValue op_b = values.top();
|
|
values.pop();
|
|
BitmapValue cal_result;
|
|
bitmap_calculate(op_a, op_b, c, cal_result);
|
|
values.push(cal_result);
|
|
}
|
|
}
|
|
}
|
|
BitmapValue result;
|
|
if (bitmap_key.length() > 0) {
|
|
result |= _bitmaps[bitmap_key];
|
|
} else if (!values.empty()) {
|
|
result |= values.top();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// calculate the bitmap value by expr bitmap calculate
|
|
int64_t bitmap_calculate_count() {
|
|
if (_bitmaps.empty()) {
|
|
return 0;
|
|
}
|
|
return bitmap_calculate().cardinality();
|
|
}
|
|
|
|
private:
|
|
constexpr int priority(char c) {
|
|
switch (c) {
|
|
case '&':
|
|
return 1;
|
|
case '|':
|
|
return 1;
|
|
case '^':
|
|
return 1;
|
|
case '-':
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
template <class T>
|
|
std::string print_stack(std::stack<T>& stack) {
|
|
std::string result;
|
|
while (!stack.empty()) {
|
|
result = stack.top() + result;
|
|
stack.pop();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::string reverse_polish(const std::string& input_str) {
|
|
std::stack<char> polish;
|
|
std::stack<char> op_stack;
|
|
bool last_is_char = false;
|
|
for (int i = 0; i < input_str.length(); i++) {
|
|
char cur_char = input_str.at(i);
|
|
if (cur_char != '&' && cur_char != '|' && cur_char != '^' && cur_char != '-' &&
|
|
cur_char != '(' && cur_char != ')' && cur_char != ' ' && cur_char != '\t') {
|
|
if (!last_is_char) {
|
|
polish.push(' ');
|
|
}
|
|
polish.push(cur_char);
|
|
last_is_char = true;
|
|
continue;
|
|
} else if (i != 0 && input_str.at(i - 1) == '\\') {
|
|
polish.push(cur_char);
|
|
last_is_char = true;
|
|
continue;
|
|
} else if (cur_char == ' ' || cur_char == '\t') {
|
|
last_is_char = false;
|
|
continue;
|
|
} else if (cur_char == '(') {
|
|
op_stack.push(cur_char);
|
|
} else if (!op_stack.empty() && cur_char == ')') {
|
|
while (!op_stack.empty() && op_stack.top() != '(') {
|
|
polish.push(op_stack.top());
|
|
op_stack.pop();
|
|
}
|
|
op_stack.pop();
|
|
} else {
|
|
if (!op_stack.empty() && op_stack.top() == '(') {
|
|
op_stack.push(cur_char);
|
|
} else {
|
|
if (!op_stack.empty() && priority(cur_char) > priority(op_stack.top())) {
|
|
op_stack.push(cur_char);
|
|
} else {
|
|
while (!op_stack.empty()) {
|
|
if (op_stack.top() == '(') {
|
|
break;
|
|
}
|
|
if (priority(cur_char) <= priority(op_stack.top())) {
|
|
polish.push(op_stack.top());
|
|
op_stack.pop();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
op_stack.push(cur_char);
|
|
}
|
|
}
|
|
}
|
|
last_is_char = false;
|
|
}
|
|
|
|
while (!op_stack.empty()) {
|
|
polish.push(op_stack.top());
|
|
op_stack.pop();
|
|
}
|
|
return print_stack(polish);
|
|
}
|
|
|
|
void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op, BitmapValue& result) {
|
|
result |= op_b;
|
|
switch (op) {
|
|
case '&':
|
|
result &= op_a;
|
|
break;
|
|
case '|':
|
|
result |= op_a;
|
|
break;
|
|
case '-':
|
|
result -= op_a;
|
|
break;
|
|
case '^':
|
|
result ^= op_a;
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::string _polish;
|
|
};
|
|
} // namespace doris
|