// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include "util/bitmap_intersect.h" namespace doris { // Compute the intersection union difference set of two or more bitmaps // Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column, input_string) // Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'), meaning find the intersection union difference set of user_id in all A/B/C/D 4 bitmaps // Operation symbol: // the operator '|' stands for union, the operator '&' stands for intersection, the operator '-' indicates the difference set, the operator '^' stands for xor class BitmapExprCalculation : public BitmapIntersect { public: BitmapExprCalculation() = default; explicit BitmapExprCalculation(const char* src) { deserialize(src); } void bitmap_calculation_init(std::string& input_str) { _polish = reverse_polish(input_str); std::string bitmap_key; for (int i = 0; i < _polish.length(); i++) { char c = _polish.at(i); if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c != '\\') { bitmap_key += c; } else if (i != 0 && _polish.at(i - 1) == '\\') { bitmap_key += c; } else if (c == '\\') { continue; } else { if (bitmap_key.length() > 0) { add_key(bitmap_key); bitmap_key.clear(); } } } if (bitmap_key.length() > 0) { add_key(bitmap_key); bitmap_key.clear(); } } BitmapValue bitmap_calculate() { std::stack values; std::string bitmap_key; for (int i = 0; i < _polish.length(); i++) { char c = _polish.at(i); if (c == ' ') { if (bitmap_key.length() > 0) { values.push(_bitmaps[bitmap_key]); bitmap_key.clear(); } } else if (c != '&' && c != '|' && c != '^' && c != '-' && c != '\\') { bitmap_key += c; } else if (i != 0 && _polish.at(i - 1) == '\\') { bitmap_key += c; } else if (c == '\\') { continue; } else { if (bitmap_key.length() > 0) { values.push(_bitmaps[bitmap_key]); bitmap_key.clear(); } if (values.size() >= 2) { BitmapValue op_a = values.top(); values.pop(); BitmapValue op_b = values.top(); values.pop(); BitmapValue cal_result; bitmap_calculate(op_a, op_b, c, cal_result); values.push(cal_result); } } } BitmapValue result; if (bitmap_key.length() > 0) { result |= _bitmaps[bitmap_key]; } else if (!values.empty()) { result |= values.top(); } return result; } // calculate the bitmap value by expr bitmap calculate int64_t bitmap_calculate_count() { if (_bitmaps.empty()) { return 0; } return bitmap_calculate().cardinality(); } private: constexpr int priority(char c) { switch (c) { case '&': return 1; case '|': return 1; case '^': return 1; case '-': return 1; default: return 0; } } template std::string print_stack(std::stack& stack) { std::string result; while (!stack.empty()) { result = stack.top() + result; stack.pop(); } return result; } std::string reverse_polish(const std::string& input_str) { std::stack polish; std::stack op_stack; bool last_is_char = false; for (int i = 0; i < input_str.length(); i++) { char cur_char = input_str.at(i); if (cur_char != '&' && cur_char != '|' && cur_char != '^' && cur_char != '-' && cur_char != '(' && cur_char != ')' && cur_char != ' ' && cur_char != '\t') { if (!last_is_char) { polish.push(' '); } polish.push(cur_char); last_is_char = true; continue; } else if (i != 0 && input_str.at(i - 1) == '\\') { polish.push(cur_char); last_is_char = true; continue; } else if (cur_char == ' ' || cur_char == '\t') { last_is_char = false; continue; } else if (cur_char == '(') { op_stack.push(cur_char); } else if (!op_stack.empty() && cur_char == ')') { while (!op_stack.empty() && op_stack.top() != '(') { polish.push(op_stack.top()); op_stack.pop(); } op_stack.pop(); } else { if (!op_stack.empty() && op_stack.top() == '(') { op_stack.push(cur_char); } else { if (!op_stack.empty() && priority(cur_char) > priority(op_stack.top())) { op_stack.push(cur_char); } else { while (!op_stack.empty()) { if (op_stack.top() == '(') { break; } if (priority(cur_char) <= priority(op_stack.top())) { polish.push(op_stack.top()); op_stack.pop(); } else { break; } } op_stack.push(cur_char); } } } last_is_char = false; } while (!op_stack.empty()) { polish.push(op_stack.top()); op_stack.pop(); } return print_stack(polish); } void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op, BitmapValue& result) { result |= op_b; switch (op) { case '&': result &= op_a; break; case '|': result |= op_a; break; case '-': result -= op_a; break; case '^': result ^= op_a; break; } } std::string _polish; }; } // namespace doris