From fe65a623c10403902fc381b4383ee4b321c02a5f Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Tue, 29 Jun 2021 09:47:42 +0800 Subject: [PATCH] Fix timeout error when delete condition contains invalid datetime format (#6030) * add date time format check in delete statment --- be/src/olap/delete_handler.cpp | 108 +++++++++--------- be/src/olap/utils.cpp | 26 ++--- be/test/olap/delete_handler_test.cpp | 7 +- .../org/apache/doris/load/DeleteHandler.java | 15 ++- 4 files changed, 85 insertions(+), 71 deletions(-) diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 577802179f..2f169c916c 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -16,14 +16,13 @@ // under the License. #include "olap/delete_handler.h" -#include "olap/reader.h" #include #include #include -#include #include +#include #include #include #include @@ -31,6 +30,7 @@ #include "gen_cpp/olap_file.pb.h" #include "olap/olap_common.h" #include "olap/olap_cond.h" +#include "olap/reader.h" #include "olap/utils.h" using apache::thrift::ThriftDebugString; @@ -39,10 +39,10 @@ using std::vector; using std::string; using std::stringstream; -using boost::regex; -using boost::regex_error; -using boost::regex_match; -using boost::smatch; +using std::regex; +using std::regex_error; +using std::regex_match; +using std::smatch; using google::protobuf::RepeatedPtrField; @@ -118,37 +118,37 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column } FieldType field_type = column.type(); - switch(field_type) { - case OLAP_FIELD_TYPE_TINYINT: - return valid_signed_number(value_str); - case OLAP_FIELD_TYPE_SMALLINT: - return valid_signed_number(value_str); - case OLAP_FIELD_TYPE_INT: - return valid_signed_number(value_str); - case OLAP_FIELD_TYPE_BIGINT: - return valid_signed_number(value_str); - case OLAP_FIELD_TYPE_LARGEINT: - return valid_signed_number(value_str); - case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: - return valid_unsigned_number(value_str); - case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: - return valid_unsigned_number(value_str); - case OLAP_FIELD_TYPE_UNSIGNED_INT: - return valid_unsigned_number(value_str); - case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: - return valid_unsigned_number(value_str); - case OLAP_FIELD_TYPE_DECIMAL: - return valid_decimal(value_str, column.precision(), column.frac()); - case OLAP_FIELD_TYPE_CHAR: - case OLAP_FIELD_TYPE_VARCHAR: - return value_str.size() <= column.length(); - case OLAP_FIELD_TYPE_DATE: - case OLAP_FIELD_TYPE_DATETIME: - return valid_datetime(value_str); - case OLAP_FIELD_TYPE_BOOL: - return valid_bool(value_str); - default: - OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type); + switch (field_type) { + case OLAP_FIELD_TYPE_TINYINT: + return valid_signed_number(value_str); + case OLAP_FIELD_TYPE_SMALLINT: + return valid_signed_number(value_str); + case OLAP_FIELD_TYPE_INT: + return valid_signed_number(value_str); + case OLAP_FIELD_TYPE_BIGINT: + return valid_signed_number(value_str); + case OLAP_FIELD_TYPE_LARGEINT: + return valid_signed_number(value_str); + case OLAP_FIELD_TYPE_UNSIGNED_TINYINT: + return valid_unsigned_number(value_str); + case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: + return valid_unsigned_number(value_str); + case OLAP_FIELD_TYPE_UNSIGNED_INT: + return valid_unsigned_number(value_str); + case OLAP_FIELD_TYPE_UNSIGNED_BIGINT: + return valid_unsigned_number(value_str); + case OLAP_FIELD_TYPE_DECIMAL: + return valid_decimal(value_str, column.precision(), column.frac()); + case OLAP_FIELD_TYPE_CHAR: + case OLAP_FIELD_TYPE_VARCHAR: + return value_str.size() <= column.length(); + case OLAP_FIELD_TYPE_DATE: + case OLAP_FIELD_TYPE_DATETIME: + return valid_datetime(value_str); + case OLAP_FIELD_TYPE_BOOL: + return valid_bool(value_str); + default: + OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type); } return false; } @@ -212,7 +212,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio matched = false; } } catch (regex_error& e) { - VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() << "]"; + VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() + << "]"; matched = false; } @@ -227,7 +228,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio } OLAPStatus DeleteHandler::init(const TabletSchema& schema, - const DelPredicateArray& delete_conditions, int64_t version, const Reader* reader) { + const DelPredicateArray& delete_conditions, int64_t version, + const Reader* reader) { DCHECK(!_is_inited) << "reinitialize delete handler."; DCHECK(version >= 0) << "invalid parameters. version=" << version; @@ -288,7 +290,6 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema, if (reader != nullptr) { temp.column_predicate_vec.push_back(reader->_parse_to_predicate(condition, true)); } - } _del_conds.emplace_back(std::move(temp)); @@ -303,7 +304,8 @@ bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor& // 根据语义,存储在_del_conds的删除条件应该是OR关系 // 因此,只要数据符合其中一条过滤条件,则返回true for (const auto& del_cond : _del_conds) { - if (data_version <= del_cond.filter_version && del_cond.del_cond->delete_conditions_eval(row)) { + if (data_version <= del_cond.filter_version && + del_cond.del_cond->delete_conditions_eval(row)) { return true; } } @@ -338,9 +340,9 @@ void DeleteHandler::finalize() { _is_inited = false; } -void DeleteHandler::get_delete_conditions_after_version(int64_t version, - std::vector* delete_conditions, - AndBlockColumnPredicate* and_block_column_predicate_ptr) const { +void DeleteHandler::get_delete_conditions_after_version( + int64_t version, std::vector* delete_conditions, + AndBlockColumnPredicate* and_block_column_predicate_ptr) const { for (auto& del_cond : _del_conds) { if (del_cond.filter_version > version) { delete_conditions->emplace_back(del_cond.del_cond); @@ -348,18 +350,20 @@ void DeleteHandler::get_delete_conditions_after_version(int64_t version, // now, only query support delete column predicate operator if (!del_cond.column_predicate_vec.empty()) { if (del_cond.column_predicate_vec.size() == 1) { - auto single_column_block_predicate = new SingleColumnBlockPredicate( - del_cond.column_predicate_vec[0]); - and_block_column_predicate_ptr->add_column_predicate(single_column_block_predicate); + auto single_column_block_predicate = + new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]); + and_block_column_predicate_ptr->add_column_predicate( + single_column_block_predicate); } else { auto or_column_predicate = new OrBlockColumnPredicate(); // build or_column_predicate - std::for_each(del_cond.column_predicate_vec.cbegin(), del_cond.column_predicate_vec.cend(), \ - [&or_column_predicate](const ColumnPredicate *predicate) { - or_column_predicate->add_column_predicate(new SingleColumnBlockPredicate(predicate)); - } - ); + std::for_each(del_cond.column_predicate_vec.cbegin(), + del_cond.column_predicate_vec.cend(), + [&or_column_predicate](const ColumnPredicate* predicate) { + or_column_predicate->add_column_predicate( + new SingleColumnBlockPredicate(predicate)); + }); and_block_column_predicate_ptr->add_column_predicate(or_column_predicate); } } diff --git a/be/src/olap/utils.cpp b/be/src/olap/utils.cpp index 3f51e5e939..70f9ffa5a5 100644 --- a/be/src/olap/utils.cpp +++ b/be/src/olap/utils.cpp @@ -26,10 +26,10 @@ #include #include -#include -#include #include #include +#include +#include #include #include @@ -86,7 +86,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si return OLAP_ERR_COMPRESS_ERROR; } else if (*written_len > dest_len) { VLOG_NOTICE << "buffer overflow when compressing. " - << "dest_len=" << dest_len << ", written_len=" << *written_len; + << "dest_len=" << dest_len << ", written_len=" << *written_len; return OLAP_ERR_BUFFER_OVERFLOW; } @@ -107,7 +107,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si return OLAP_ERR_COMPRESS_ERROR; } else if (*written_len > dest_len) { VLOG_NOTICE << "buffer overflow when compressing. " - << ", dest_len=" << dest_len << ", written_len=" << *written_len; + << ", dest_len=" << dest_len << ", written_len=" << *written_len; return OLAP_ERR_BUFFER_OVERFLOW; } @@ -121,7 +121,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si *written_len = lz4_res; if (0 == lz4_res) { VLOG_TRACE << "compress failed. src_len=" << src_len << ", dest_len=" << dest_len - << ", written_len=" << *written_len << ", lz4_res=" << lz4_res; + << ", written_len=" << *written_len << ", lz4_res=" << lz4_res; return OLAP_ERR_BUFFER_OVERFLOW; } break; @@ -777,8 +777,8 @@ OLAPStatus read_write_test_file(const string& test_file_path) { } if (remove(test_file_path.c_str()) != 0) { char errmsg[64]; - VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) << "' path='" - << test_file_path << "']"; + VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) + << "' path='" << test_file_path << "']"; return OLAP_ERR_IO_ERROR; } return res; @@ -913,9 +913,9 @@ bool valid_signed_number(const std::string& value_str) { bool valid_decimal(const string& value_str, const uint32_t precision, const uint32_t frac) { const char* decimal_pattern = "-?\\d+(.\\d+)?"; - boost::regex e(decimal_pattern); - boost::smatch what; - if (!boost::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) { + std::regex e(decimal_pattern); + std::smatch what; + if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) { LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]"; return false; } @@ -947,10 +947,10 @@ bool valid_datetime(const string& value_str) { const char* datetime_pattern = "((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*" "(((?:\\d){2}):((?:\\d){2}):((?:\\d){2}))?"; - boost::regex e(datetime_pattern); - boost::smatch what; + std::regex e(datetime_pattern); + std::smatch what; - if (boost::regex_match(value_str, what, e)) { + if (std::regex_match(value_str, what, e)) { if (what[0].str().size() != value_str.size()) { OLAP_LOG_WARNING("datetime str does not fully match. [value_str=%s match=%s]", value_str.c_str(), what[0].str().c_str()); diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index b6bb0cfe8e..e78e835109 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -22,8 +22,8 @@ #include #include -#include #include +#include #include #include @@ -32,13 +32,12 @@ #include "olap/push_handler.h" #include "olap/storage_engine.h" #include "olap/utils.h" +#include "util/cpu_info.h" #include "util/file_utils.h" #include "util/logging.h" -#include "util/cpu_info.h" using namespace std; using namespace doris; -using namespace boost::assign; using google::protobuf::RepeatedPtrField; namespace doris { @@ -1117,7 +1116,7 @@ TEST_F(TestDeleteHandler, FilterDataVersion) { _delete_handler.finalize(); } -} // namespace doris +} // namespace doris int main(int argc, char** argv) { doris::init_glog("be-test"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java index b583c7b517..1c80d72079 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java @@ -18,6 +18,7 @@ package org.apache.doris.load; import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.DeleteStmt; import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IsNullPredicate; @@ -526,12 +527,16 @@ public class DeleteHandler implements Writable { // if a bool cond passed to be, be's zone_map cannot handle bool correctly, // change it to a tinyint type here; value = ((LiteralExpr) binaryPredicate.getChild(1)).getStringValue(); - if (column.getDataType() == PrimitiveType.BOOLEAN ) { + if (column.getDataType() == PrimitiveType.BOOLEAN) { if (value.toLowerCase().equals("true")) { binaryPredicate.setChild(1, LiteralExpr.create("1", Type.TINYINT)); } else if (value.toLowerCase().equals("false")) { binaryPredicate.setChild(1, LiteralExpr.create("0", Type.TINYINT)); } + } else if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) { + DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType())); + value = dateLiteral.getStringValue(); + binaryPredicate.setChild(1, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()))); } LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); } catch (AnalysisException e) { @@ -544,7 +549,13 @@ public class DeleteHandler implements Writable { InPredicate inPredicate = (InPredicate) condition; for (int i = 1; i <= inPredicate.getInElementNum(); i++) { value = ((LiteralExpr) inPredicate.getChild(i)).getStringValue(); - LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); + if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) { + DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType())); + value = dateLiteral.getStringValue(); + inPredicate.setChild(i, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()))); + } else { + LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())); + } } } catch (AnalysisException e) { throw new DdlException("Invalid column value[" + value + "] for column " + columnName);