Fix timeout error when delete condition contains invalid datetime format (#6030)

* add date time format check in delete statment
This commit is contained in:
Zhengguo Yang
2021-06-29 09:47:42 +08:00
committed by GitHub
parent dd455af844
commit fe65a623c1
4 changed files with 85 additions and 71 deletions

View File

@ -16,14 +16,13 @@
// under the License.
#include "olap/delete_handler.h"
#include "olap/reader.h"
#include <errno.h>
#include <json2pb/pb_to_json.h>
#include <thrift/protocol/TDebugProtocol.h>
#include <boost/regex.hpp>
#include <limits>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
@ -31,6 +30,7 @@
#include "gen_cpp/olap_file.pb.h"
#include "olap/olap_common.h"
#include "olap/olap_cond.h"
#include "olap/reader.h"
#include "olap/utils.h"
using apache::thrift::ThriftDebugString;
@ -39,10 +39,10 @@ using std::vector;
using std::string;
using std::stringstream;
using boost::regex;
using boost::regex_error;
using boost::regex_match;
using boost::smatch;
using std::regex;
using std::regex_error;
using std::regex_match;
using std::smatch;
using google::protobuf::RepeatedPtrField;
@ -118,37 +118,37 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column
}
FieldType field_type = column.type();
switch(field_type) {
case OLAP_FIELD_TYPE_TINYINT:
return valid_signed_number<int8_t>(value_str);
case OLAP_FIELD_TYPE_SMALLINT:
return valid_signed_number<int16_t>(value_str);
case OLAP_FIELD_TYPE_INT:
return valid_signed_number<int32_t>(value_str);
case OLAP_FIELD_TYPE_BIGINT:
return valid_signed_number<int64_t>(value_str);
case OLAP_FIELD_TYPE_LARGEINT:
return valid_signed_number<int128_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
return valid_unsigned_number<uint8_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
return valid_unsigned_number<uint16_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_INT:
return valid_unsigned_number<uint32_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
return valid_unsigned_number<uint64_t>(value_str);
case OLAP_FIELD_TYPE_DECIMAL:
return valid_decimal(value_str, column.precision(), column.frac());
case OLAP_FIELD_TYPE_CHAR:
case OLAP_FIELD_TYPE_VARCHAR:
return value_str.size() <= column.length();
case OLAP_FIELD_TYPE_DATE:
case OLAP_FIELD_TYPE_DATETIME:
return valid_datetime(value_str);
case OLAP_FIELD_TYPE_BOOL:
return valid_bool(value_str);
default:
OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type);
switch (field_type) {
case OLAP_FIELD_TYPE_TINYINT:
return valid_signed_number<int8_t>(value_str);
case OLAP_FIELD_TYPE_SMALLINT:
return valid_signed_number<int16_t>(value_str);
case OLAP_FIELD_TYPE_INT:
return valid_signed_number<int32_t>(value_str);
case OLAP_FIELD_TYPE_BIGINT:
return valid_signed_number<int64_t>(value_str);
case OLAP_FIELD_TYPE_LARGEINT:
return valid_signed_number<int128_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
return valid_unsigned_number<uint8_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
return valid_unsigned_number<uint16_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_INT:
return valid_unsigned_number<uint32_t>(value_str);
case OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
return valid_unsigned_number<uint64_t>(value_str);
case OLAP_FIELD_TYPE_DECIMAL:
return valid_decimal(value_str, column.precision(), column.frac());
case OLAP_FIELD_TYPE_CHAR:
case OLAP_FIELD_TYPE_VARCHAR:
return value_str.size() <= column.length();
case OLAP_FIELD_TYPE_DATE:
case OLAP_FIELD_TYPE_DATETIME:
return valid_datetime(value_str);
case OLAP_FIELD_TYPE_BOOL:
return valid_bool(value_str);
default:
OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type);
}
return false;
}
@ -212,7 +212,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio
matched = false;
}
} catch (regex_error& e) {
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() << "]";
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what()
<< "]";
matched = false;
}
@ -227,7 +228,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio
}
OLAPStatus DeleteHandler::init(const TabletSchema& schema,
const DelPredicateArray& delete_conditions, int64_t version, const Reader* reader) {
const DelPredicateArray& delete_conditions, int64_t version,
const Reader* reader) {
DCHECK(!_is_inited) << "reinitialize delete handler.";
DCHECK(version >= 0) << "invalid parameters. version=" << version;
@ -288,7 +290,6 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
if (reader != nullptr) {
temp.column_predicate_vec.push_back(reader->_parse_to_predicate(condition, true));
}
}
_del_conds.emplace_back(std::move(temp));
@ -303,7 +304,8 @@ bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor&
// 根据语义,存储在_del_conds的删除条件应该是OR关系
// 因此,只要数据符合其中一条过滤条件,则返回true
for (const auto& del_cond : _del_conds) {
if (data_version <= del_cond.filter_version && del_cond.del_cond->delete_conditions_eval(row)) {
if (data_version <= del_cond.filter_version &&
del_cond.del_cond->delete_conditions_eval(row)) {
return true;
}
}
@ -338,9 +340,9 @@ void DeleteHandler::finalize() {
_is_inited = false;
}
void DeleteHandler::get_delete_conditions_after_version(int64_t version,
std::vector<const Conditions *>* delete_conditions,
AndBlockColumnPredicate* and_block_column_predicate_ptr) const {
void DeleteHandler::get_delete_conditions_after_version(
int64_t version, std::vector<const Conditions*>* delete_conditions,
AndBlockColumnPredicate* and_block_column_predicate_ptr) const {
for (auto& del_cond : _del_conds) {
if (del_cond.filter_version > version) {
delete_conditions->emplace_back(del_cond.del_cond);
@ -348,18 +350,20 @@ void DeleteHandler::get_delete_conditions_after_version(int64_t version,
// now, only query support delete column predicate operator
if (!del_cond.column_predicate_vec.empty()) {
if (del_cond.column_predicate_vec.size() == 1) {
auto single_column_block_predicate = new SingleColumnBlockPredicate(
del_cond.column_predicate_vec[0]);
and_block_column_predicate_ptr->add_column_predicate(single_column_block_predicate);
auto single_column_block_predicate =
new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]);
and_block_column_predicate_ptr->add_column_predicate(
single_column_block_predicate);
} else {
auto or_column_predicate = new OrBlockColumnPredicate();
// build or_column_predicate
std::for_each(del_cond.column_predicate_vec.cbegin(), del_cond.column_predicate_vec.cend(), \
[&or_column_predicate](const ColumnPredicate *predicate) {
or_column_predicate->add_column_predicate(new SingleColumnBlockPredicate(predicate));
}
);
std::for_each(del_cond.column_predicate_vec.cbegin(),
del_cond.column_predicate_vec.cend(),
[&or_column_predicate](const ColumnPredicate* predicate) {
or_column_predicate->add_column_predicate(
new SingleColumnBlockPredicate(predicate));
});
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
}
}

View File

@ -26,10 +26,10 @@
#include <time.h>
#include <unistd.h>
#include <filesystem>
#include <boost/regex.hpp>
#include <cstdint>
#include <cstring>
#include <filesystem>
#include <regex>
#include <string>
#include <vector>
@ -86,7 +86,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
return OLAP_ERR_COMPRESS_ERROR;
} else if (*written_len > dest_len) {
VLOG_NOTICE << "buffer overflow when compressing. "
<< "dest_len=" << dest_len << ", written_len=" << *written_len;
<< "dest_len=" << dest_len << ", written_len=" << *written_len;
return OLAP_ERR_BUFFER_OVERFLOW;
}
@ -107,7 +107,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
return OLAP_ERR_COMPRESS_ERROR;
} else if (*written_len > dest_len) {
VLOG_NOTICE << "buffer overflow when compressing. "
<< ", dest_len=" << dest_len << ", written_len=" << *written_len;
<< ", dest_len=" << dest_len << ", written_len=" << *written_len;
return OLAP_ERR_BUFFER_OVERFLOW;
}
@ -121,7 +121,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
*written_len = lz4_res;
if (0 == lz4_res) {
VLOG_TRACE << "compress failed. src_len=" << src_len << ", dest_len=" << dest_len
<< ", written_len=" << *written_len << ", lz4_res=" << lz4_res;
<< ", written_len=" << *written_len << ", lz4_res=" << lz4_res;
return OLAP_ERR_BUFFER_OVERFLOW;
}
break;
@ -777,8 +777,8 @@ OLAPStatus read_write_test_file(const string& test_file_path) {
}
if (remove(test_file_path.c_str()) != 0) {
char errmsg[64];
VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) << "' path='"
<< test_file_path << "']";
VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64)
<< "' path='" << test_file_path << "']";
return OLAP_ERR_IO_ERROR;
}
return res;
@ -913,9 +913,9 @@ bool valid_signed_number<int128_t>(const std::string& value_str) {
bool valid_decimal(const string& value_str, const uint32_t precision, const uint32_t frac) {
const char* decimal_pattern = "-?\\d+(.\\d+)?";
boost::regex e(decimal_pattern);
boost::smatch what;
if (!boost::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
std::regex e(decimal_pattern);
std::smatch what;
if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]";
return false;
}
@ -947,10 +947,10 @@ bool valid_datetime(const string& value_str) {
const char* datetime_pattern =
"((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*"
"(((?:\\d){2}):((?:\\d){2}):((?:\\d){2}))?";
boost::regex e(datetime_pattern);
boost::smatch what;
std::regex e(datetime_pattern);
std::smatch what;
if (boost::regex_match(value_str, what, e)) {
if (std::regex_match(value_str, what, e)) {
if (what[0].str().size() != value_str.size()) {
OLAP_LOG_WARNING("datetime str does not fully match. [value_str=%s match=%s]",
value_str.c_str(), what[0].str().c_str());

View File

@ -22,8 +22,8 @@
#include <algorithm>
#include <boost/assign.hpp>
#include <boost/regex.hpp>
#include <iostream>
#include <regex>
#include <string>
#include <vector>
@ -32,13 +32,12 @@
#include "olap/push_handler.h"
#include "olap/storage_engine.h"
#include "olap/utils.h"
#include "util/cpu_info.h"
#include "util/file_utils.h"
#include "util/logging.h"
#include "util/cpu_info.h"
using namespace std;
using namespace doris;
using namespace boost::assign;
using google::protobuf::RepeatedPtrField;
namespace doris {
@ -1117,7 +1116,7 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
_delete_handler.finalize();
}
} // namespace doris
} // namespace doris
int main(int argc, char** argv) {
doris::init_glog("be-test");

View File

@ -18,6 +18,7 @@
package org.apache.doris.load;
import org.apache.doris.analysis.BinaryPredicate;
import org.apache.doris.analysis.DateLiteral;
import org.apache.doris.analysis.DeleteStmt;
import org.apache.doris.analysis.InPredicate;
import org.apache.doris.analysis.IsNullPredicate;
@ -526,12 +527,16 @@ public class DeleteHandler implements Writable {
// if a bool cond passed to be, be's zone_map cannot handle bool correctly,
// change it to a tinyint type here;
value = ((LiteralExpr) binaryPredicate.getChild(1)).getStringValue();
if (column.getDataType() == PrimitiveType.BOOLEAN ) {
if (column.getDataType() == PrimitiveType.BOOLEAN) {
if (value.toLowerCase().equals("true")) {
binaryPredicate.setChild(1, LiteralExpr.create("1", Type.TINYINT));
} else if (value.toLowerCase().equals("false")) {
binaryPredicate.setChild(1, LiteralExpr.create("0", Type.TINYINT));
}
} else if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) {
DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType()));
value = dateLiteral.getStringValue();
binaryPredicate.setChild(1, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())));
}
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
} catch (AnalysisException e) {
@ -544,7 +549,13 @@ public class DeleteHandler implements Writable {
InPredicate inPredicate = (InPredicate) condition;
for (int i = 1; i <= inPredicate.getInElementNum(); i++) {
value = ((LiteralExpr) inPredicate.getChild(i)).getStringValue();
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) {
DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType()));
value = dateLiteral.getStringValue();
inPredicate.setChild(i, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())));
} else {
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
}
}
} catch (AnalysisException e) {
throw new DdlException("Invalid column value[" + value + "] for column " + columnName);