// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "olap/delete_handler.h" #include #include #include #include #include #include #include #include #include "gen_cpp/olap_file.pb.h" #include "olap/olap_common.h" #include "olap/olap_cond.h" #include "olap/reader.h" #include "olap/utils.h" using apache::thrift::ThriftDebugString; using std::numeric_limits; using std::vector; using std::string; using std::stringstream; using std::regex; using std::regex_error; using std::regex_match; using std::smatch; using google::protobuf::RepeatedPtrField; namespace doris { Status DeleteConditionHandler::generate_delete_predicate(const TabletSchema& schema, const std::vector& conditions, DeletePredicatePB* del_pred) { if (conditions.empty()) { LOG(WARNING) << "invalid parameters for store_cond." << " condition_size=" << conditions.size(); return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_PARAMETERS); } // Check whether the delete condition meets the requirements for (const TCondition& condition : conditions) { if (check_condition_valid(schema, condition) != Status::OK()) { LOG(WARNING) << "invalid condition. condition=" << ThriftDebugString(condition); return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_CONDITION); } } // Store delete condition for (const TCondition& condition : conditions) { if (condition.condition_values.size() > 1) { InPredicatePB* in_pred = del_pred->add_in_predicates(); in_pred->set_column_name(condition.column_name); bool is_not_in = condition.condition_op == "!*="; in_pred->set_is_not_in(is_not_in); for (const auto& condition_value : condition.condition_values) { in_pred->add_values(condition_value); } LOG(INFO) << "store one sub-delete condition. condition name=" << in_pred->column_name() << "condition size=" << in_pred->values().size(); } else { string condition_str = construct_sub_predicates(condition); del_pred->add_sub_predicates(condition_str); LOG(INFO) << "store one sub-delete condition. condition=" << condition_str; } } del_pred->set_version(-1); return Status::OK(); } std::string DeleteConditionHandler::construct_sub_predicates(const TCondition& condition) { string op = condition.condition_op; if (op == "<") { op += "<"; } else if (op == ">") { op += ">"; } string condition_str = ""; if ("IS" == op) { condition_str = condition.column_name + " " + op + " " + condition.condition_values[0]; } else { if (op == "*=") { op = "="; } else if (op == "!*=") { op = "!="; } condition_str = condition.column_name + op + condition.condition_values[0]; } return condition_str; } bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column, const std::string& condition_op, const string& value_str) { if ("IS" == condition_op && ("NULL" == value_str || "NOT NULL" == value_str)) { return true; } FieldType field_type = column.type(); switch (field_type) { case OLAP_FIELD_TYPE_TINYINT: return valid_signed_number(value_str); case OLAP_FIELD_TYPE_SMALLINT: return valid_signed_number(value_str); case OLAP_FIELD_TYPE_INT: return valid_signed_number(value_str); case OLAP_FIELD_TYPE_BIGINT: return valid_signed_number(value_str); case OLAP_FIELD_TYPE_LARGEINT: return valid_signed_number(value_str); case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: return valid_unsigned_number(value_str); case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: return valid_unsigned_number(value_str); case OLAP_FIELD_TYPE_UNSIGNED_INT: return valid_unsigned_number(value_str); case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: return valid_unsigned_number(value_str); case OLAP_FIELD_TYPE_DECIMAL: return valid_decimal(value_str, column.precision(), column.frac()); case OLAP_FIELD_TYPE_CHAR: case OLAP_FIELD_TYPE_VARCHAR: return value_str.size() <= column.length(); case OLAP_FIELD_TYPE_STRING: return value_str.size() <= config::string_type_length_soft_limit_bytes; case OLAP_FIELD_TYPE_DATE: case OLAP_FIELD_TYPE_DATETIME: return valid_datetime(value_str); case OLAP_FIELD_TYPE_BOOL: return valid_bool(value_str); default: LOG(WARNING) << "unknown field type. [type=" << field_type << "]"; } return false; } Status DeleteConditionHandler::check_condition_valid(const TabletSchema& schema, const TCondition& cond) { // Check whether the column exists int32_t field_index = schema.field_index(cond.column_name); if (field_index < 0) { LOG(WARNING) << "field is not existent. [field_index=" << field_index << "]"; return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_CONDITION); } // Delete condition should only applied on key columns or duplicate key table, and // the condition column type should not be float or double. const TabletColumn& column = schema.column(field_index); if ((!column.is_key() && schema.keys_type() != KeysType::DUP_KEYS) || column.type() == OLAP_FIELD_TYPE_DOUBLE || column.type() == OLAP_FIELD_TYPE_FLOAT) { LOG(WARNING) << "field is not key column, or storage model is not duplicate, or data type " "is float or double."; return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_CONDITION); } // Check operator and operands size are matched. if ("*=" != cond.condition_op && "!*=" != cond.condition_op && cond.condition_values.size() != 1) { LOG(WARNING) << "invalid condition value size. [size=" << cond.condition_values.size() << "]"; return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_CONDITION); } // Check each operand is valid for (const auto& condition_value : cond.condition_values) { if (!is_condition_value_valid(column, cond.condition_op, condition_value)) { LOG(WARNING) << "invalid condition value. [value=" << condition_value << "]"; return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_CONDITION); } } return Status::OK(); } bool DeleteHandler::_parse_condition(const std::string& condition_str, TCondition* condition) { bool matched = true; smatch what; try { // Condition string format, the format is (column_name)(op)(value) // eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n" // group1: (\w+) matches "c1" // group2: ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) matches "=" // group3: ((?:[\s\S]+)?) matches "1597751948193618247 and length(source)<1;\n;\n" const char* const CONDITION_STR_PATTERN = R"((\w+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*((?:[\s\S]+)?))"; regex ex(CONDITION_STR_PATTERN); if (regex_match(condition_str, what, ex)) { if (condition_str.size() != what[0].str().size()) { matched = false; } } else { matched = false; } } catch (regex_error& e) { VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() << "]"; matched = false; } if (!matched) { return false; } condition->column_name = what[1].str(); condition->condition_op = what[2].str(); condition->condition_values.push_back(what[3].str()); return true; } Status DeleteHandler::init(const TabletSchema& schema, const DelPredicateArray& delete_conditions, int64_t version, const TabletReader* reader) { DCHECK(!_is_inited) << "reinitialize delete handler."; DCHECK(version >= 0) << "invalid parameters. version=" << version; for (const auto& delete_condition : delete_conditions) { // Skip the delete condition with large version if (delete_condition.version() > version) { continue; } DeleteConditions temp; temp.filter_version = delete_condition.version(); temp.del_cond = new (std::nothrow) Conditions(); if (temp.del_cond == nullptr) { LOG(FATAL) << "fail to malloc Conditions. size=" << sizeof(Conditions); return Status::OLAPInternalError(OLAP_ERR_MALLOC_ERROR); } temp.del_cond->set_tablet_schema(&schema); for (const auto& sub_predicate : delete_condition.sub_predicates()) { TCondition condition; if (!_parse_condition(sub_predicate, &condition)) { LOG(WARNING) << "fail to parse condition. [condition=" << sub_predicate << "]"; return Status::OLAPInternalError(OLAP_ERR_DELETE_INVALID_PARAMETERS); } Status res = temp.del_cond->append_condition(condition); if (!res.ok()) { LOG(WARNING) << "fail to append condition.res = " << res; return res; } if (reader != nullptr) { auto predicate = reader->_parse_to_predicate(condition, true); if (predicate != nullptr) { temp.column_predicate_vec.push_back(predicate); } } } for (const auto& in_predicate : delete_condition.in_predicates()) { TCondition condition; condition.__set_column_name(in_predicate.column_name()); if (in_predicate.is_not_in()) { condition.__set_condition_op("!*="); } else { condition.__set_condition_op("*="); } for (const auto& value : in_predicate.values()) { condition.condition_values.push_back(value); } Status res = temp.del_cond->append_condition(condition); if (!res.ok()) { LOG(WARNING) << "fail to append condition.res = " << res; return res; } if (reader != nullptr) { temp.column_predicate_vec.push_back(reader->_parse_to_predicate(condition, true)); } } _del_conds.emplace_back(std::move(temp)); } _is_inited = true; return Status::OK(); } bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor& row) const { // According to semantics, the delete condition stored in _del_conds should be an OR relationship, // so as long as the data matches one of the _del_conds, it will return true. for (const auto& del_cond : _del_conds) { if (data_version <= del_cond.filter_version && del_cond.del_cond->delete_conditions_eval(row)) { return true; } } return false; } std::vector DeleteHandler::get_conds_version() { std::vector conds_version; for (const auto& cond : _del_conds) { conds_version.push_back(cond.filter_version); } return conds_version; } void DeleteHandler::finalize() { if (!_is_inited) { return; } for (auto& cond : _del_conds) { cond.del_cond->finalize(); delete cond.del_cond; for (auto pred : cond.column_predicate_vec) { delete pred; } } _del_conds.clear(); _is_inited = false; } void DeleteHandler::get_delete_conditions_after_version( int64_t version, std::vector* delete_conditions, AndBlockColumnPredicate* and_block_column_predicate_ptr) const { for (auto& del_cond : _del_conds) { if (del_cond.filter_version > version) { delete_conditions->emplace_back(del_cond.del_cond); // now, only query support delete column predicate operator if (!del_cond.column_predicate_vec.empty()) { if (del_cond.column_predicate_vec.size() == 1) { auto single_column_block_predicate = new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]); and_block_column_predicate_ptr->add_column_predicate( single_column_block_predicate); } else { auto or_column_predicate = new OrBlockColumnPredicate(); // build or_column_predicate std::for_each(del_cond.column_predicate_vec.cbegin(), del_cond.column_predicate_vec.cend(), [&or_column_predicate](const ColumnPredicate* predicate) { or_column_predicate->add_column_predicate( new SingleColumnBlockPredicate(predicate)); }); and_block_column_predicate_ptr->add_column_predicate(or_column_predicate); } } } } } } // namespace doris