Fix timeout error when delete condition contains invalid datetime format (#6030)
* add date time format check in delete statment
This commit is contained in:
@ -16,14 +16,13 @@
|
||||
// under the License.
|
||||
|
||||
#include "olap/delete_handler.h"
|
||||
#include "olap/reader.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <json2pb/pb_to_json.h>
|
||||
#include <thrift/protocol/TDebugProtocol.h>
|
||||
|
||||
#include <boost/regex.hpp>
|
||||
#include <limits>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -31,6 +30,7 @@
|
||||
#include "gen_cpp/olap_file.pb.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/olap_cond.h"
|
||||
#include "olap/reader.h"
|
||||
#include "olap/utils.h"
|
||||
|
||||
using apache::thrift::ThriftDebugString;
|
||||
@ -39,10 +39,10 @@ using std::vector;
|
||||
using std::string;
|
||||
using std::stringstream;
|
||||
|
||||
using boost::regex;
|
||||
using boost::regex_error;
|
||||
using boost::regex_match;
|
||||
using boost::smatch;
|
||||
using std::regex;
|
||||
using std::regex_error;
|
||||
using std::regex_match;
|
||||
using std::smatch;
|
||||
|
||||
using google::protobuf::RepeatedPtrField;
|
||||
|
||||
@ -118,37 +118,37 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column
|
||||
}
|
||||
|
||||
FieldType field_type = column.type();
|
||||
switch(field_type) {
|
||||
case OLAP_FIELD_TYPE_TINYINT:
|
||||
return valid_signed_number<int8_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_SMALLINT:
|
||||
return valid_signed_number<int16_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_INT:
|
||||
return valid_signed_number<int32_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_BIGINT:
|
||||
return valid_signed_number<int64_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_LARGEINT:
|
||||
return valid_signed_number<int128_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
|
||||
return valid_unsigned_number<uint8_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
|
||||
return valid_unsigned_number<uint16_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_INT:
|
||||
return valid_unsigned_number<uint32_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
|
||||
return valid_unsigned_number<uint64_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_DECIMAL:
|
||||
return valid_decimal(value_str, column.precision(), column.frac());
|
||||
case OLAP_FIELD_TYPE_CHAR:
|
||||
case OLAP_FIELD_TYPE_VARCHAR:
|
||||
return value_str.size() <= column.length();
|
||||
case OLAP_FIELD_TYPE_DATE:
|
||||
case OLAP_FIELD_TYPE_DATETIME:
|
||||
return valid_datetime(value_str);
|
||||
case OLAP_FIELD_TYPE_BOOL:
|
||||
return valid_bool(value_str);
|
||||
default:
|
||||
OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type);
|
||||
switch (field_type) {
|
||||
case OLAP_FIELD_TYPE_TINYINT:
|
||||
return valid_signed_number<int8_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_SMALLINT:
|
||||
return valid_signed_number<int16_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_INT:
|
||||
return valid_signed_number<int32_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_BIGINT:
|
||||
return valid_signed_number<int64_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_LARGEINT:
|
||||
return valid_signed_number<int128_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_TINYINT:
|
||||
return valid_unsigned_number<uint8_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_SMALLINT:
|
||||
return valid_unsigned_number<uint16_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_INT:
|
||||
return valid_unsigned_number<uint32_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_UNSIGNED_BIGINT:
|
||||
return valid_unsigned_number<uint64_t>(value_str);
|
||||
case OLAP_FIELD_TYPE_DECIMAL:
|
||||
return valid_decimal(value_str, column.precision(), column.frac());
|
||||
case OLAP_FIELD_TYPE_CHAR:
|
||||
case OLAP_FIELD_TYPE_VARCHAR:
|
||||
return value_str.size() <= column.length();
|
||||
case OLAP_FIELD_TYPE_DATE:
|
||||
case OLAP_FIELD_TYPE_DATETIME:
|
||||
return valid_datetime(value_str);
|
||||
case OLAP_FIELD_TYPE_BOOL:
|
||||
return valid_bool(value_str);
|
||||
default:
|
||||
OLAP_LOG_WARNING("unknown field type. [type=%d]", field_type);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -212,7 +212,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio
|
||||
matched = false;
|
||||
}
|
||||
} catch (regex_error& e) {
|
||||
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what() << "]";
|
||||
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what()
|
||||
<< "]";
|
||||
matched = false;
|
||||
}
|
||||
|
||||
@ -227,7 +228,8 @@ bool DeleteHandler::_parse_condition(const std::string& condition_str, TConditio
|
||||
}
|
||||
|
||||
OLAPStatus DeleteHandler::init(const TabletSchema& schema,
|
||||
const DelPredicateArray& delete_conditions, int64_t version, const Reader* reader) {
|
||||
const DelPredicateArray& delete_conditions, int64_t version,
|
||||
const Reader* reader) {
|
||||
DCHECK(!_is_inited) << "reinitialize delete handler.";
|
||||
DCHECK(version >= 0) << "invalid parameters. version=" << version;
|
||||
|
||||
@ -288,7 +290,6 @@ OLAPStatus DeleteHandler::init(const TabletSchema& schema,
|
||||
if (reader != nullptr) {
|
||||
temp.column_predicate_vec.push_back(reader->_parse_to_predicate(condition, true));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
_del_conds.emplace_back(std::move(temp));
|
||||
@ -303,7 +304,8 @@ bool DeleteHandler::is_filter_data(const int64_t data_version, const RowCursor&
|
||||
// 根据语义,存储在_del_conds的删除条件应该是OR关系
|
||||
// 因此,只要数据符合其中一条过滤条件,则返回true
|
||||
for (const auto& del_cond : _del_conds) {
|
||||
if (data_version <= del_cond.filter_version && del_cond.del_cond->delete_conditions_eval(row)) {
|
||||
if (data_version <= del_cond.filter_version &&
|
||||
del_cond.del_cond->delete_conditions_eval(row)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -338,9 +340,9 @@ void DeleteHandler::finalize() {
|
||||
_is_inited = false;
|
||||
}
|
||||
|
||||
void DeleteHandler::get_delete_conditions_after_version(int64_t version,
|
||||
std::vector<const Conditions *>* delete_conditions,
|
||||
AndBlockColumnPredicate* and_block_column_predicate_ptr) const {
|
||||
void DeleteHandler::get_delete_conditions_after_version(
|
||||
int64_t version, std::vector<const Conditions*>* delete_conditions,
|
||||
AndBlockColumnPredicate* and_block_column_predicate_ptr) const {
|
||||
for (auto& del_cond : _del_conds) {
|
||||
if (del_cond.filter_version > version) {
|
||||
delete_conditions->emplace_back(del_cond.del_cond);
|
||||
@ -348,18 +350,20 @@ void DeleteHandler::get_delete_conditions_after_version(int64_t version,
|
||||
// now, only query support delete column predicate operator
|
||||
if (!del_cond.column_predicate_vec.empty()) {
|
||||
if (del_cond.column_predicate_vec.size() == 1) {
|
||||
auto single_column_block_predicate = new SingleColumnBlockPredicate(
|
||||
del_cond.column_predicate_vec[0]);
|
||||
and_block_column_predicate_ptr->add_column_predicate(single_column_block_predicate);
|
||||
auto single_column_block_predicate =
|
||||
new SingleColumnBlockPredicate(del_cond.column_predicate_vec[0]);
|
||||
and_block_column_predicate_ptr->add_column_predicate(
|
||||
single_column_block_predicate);
|
||||
} else {
|
||||
auto or_column_predicate = new OrBlockColumnPredicate();
|
||||
|
||||
// build or_column_predicate
|
||||
std::for_each(del_cond.column_predicate_vec.cbegin(), del_cond.column_predicate_vec.cend(), \
|
||||
[&or_column_predicate](const ColumnPredicate *predicate) {
|
||||
or_column_predicate->add_column_predicate(new SingleColumnBlockPredicate(predicate));
|
||||
}
|
||||
);
|
||||
std::for_each(del_cond.column_predicate_vec.cbegin(),
|
||||
del_cond.column_predicate_vec.cend(),
|
||||
[&or_column_predicate](const ColumnPredicate* predicate) {
|
||||
or_column_predicate->add_column_predicate(
|
||||
new SingleColumnBlockPredicate(predicate));
|
||||
});
|
||||
and_block_column_predicate_ptr->add_column_predicate(or_column_predicate);
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,10 +26,10 @@
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <filesystem>
|
||||
#include <boost/regex.hpp>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -86,7 +86,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
|
||||
return OLAP_ERR_COMPRESS_ERROR;
|
||||
} else if (*written_len > dest_len) {
|
||||
VLOG_NOTICE << "buffer overflow when compressing. "
|
||||
<< "dest_len=" << dest_len << ", written_len=" << *written_len;
|
||||
<< "dest_len=" << dest_len << ", written_len=" << *written_len;
|
||||
|
||||
return OLAP_ERR_BUFFER_OVERFLOW;
|
||||
}
|
||||
@ -107,7 +107,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
|
||||
return OLAP_ERR_COMPRESS_ERROR;
|
||||
} else if (*written_len > dest_len) {
|
||||
VLOG_NOTICE << "buffer overflow when compressing. "
|
||||
<< ", dest_len=" << dest_len << ", written_len=" << *written_len;
|
||||
<< ", dest_len=" << dest_len << ", written_len=" << *written_len;
|
||||
|
||||
return OLAP_ERR_BUFFER_OVERFLOW;
|
||||
}
|
||||
@ -121,7 +121,7 @@ OLAPStatus olap_compress(const char* src_buf, size_t src_len, char* dest_buf, si
|
||||
*written_len = lz4_res;
|
||||
if (0 == lz4_res) {
|
||||
VLOG_TRACE << "compress failed. src_len=" << src_len << ", dest_len=" << dest_len
|
||||
<< ", written_len=" << *written_len << ", lz4_res=" << lz4_res;
|
||||
<< ", written_len=" << *written_len << ", lz4_res=" << lz4_res;
|
||||
return OLAP_ERR_BUFFER_OVERFLOW;
|
||||
}
|
||||
break;
|
||||
@ -777,8 +777,8 @@ OLAPStatus read_write_test_file(const string& test_file_path) {
|
||||
}
|
||||
if (remove(test_file_path.c_str()) != 0) {
|
||||
char errmsg[64];
|
||||
VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64) << "' path='"
|
||||
<< test_file_path << "']";
|
||||
VLOG_NOTICE << "fail to delete test file. [err='" << strerror_r(errno, errmsg, 64)
|
||||
<< "' path='" << test_file_path << "']";
|
||||
return OLAP_ERR_IO_ERROR;
|
||||
}
|
||||
return res;
|
||||
@ -913,9 +913,9 @@ bool valid_signed_number<int128_t>(const std::string& value_str) {
|
||||
|
||||
bool valid_decimal(const string& value_str, const uint32_t precision, const uint32_t frac) {
|
||||
const char* decimal_pattern = "-?\\d+(.\\d+)?";
|
||||
boost::regex e(decimal_pattern);
|
||||
boost::smatch what;
|
||||
if (!boost::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
|
||||
std::regex e(decimal_pattern);
|
||||
std::smatch what;
|
||||
if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
|
||||
LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]";
|
||||
return false;
|
||||
}
|
||||
@ -947,10 +947,10 @@ bool valid_datetime(const string& value_str) {
|
||||
const char* datetime_pattern =
|
||||
"((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*"
|
||||
"(((?:\\d){2}):((?:\\d){2}):((?:\\d){2}))?";
|
||||
boost::regex e(datetime_pattern);
|
||||
boost::smatch what;
|
||||
std::regex e(datetime_pattern);
|
||||
std::smatch what;
|
||||
|
||||
if (boost::regex_match(value_str, what, e)) {
|
||||
if (std::regex_match(value_str, what, e)) {
|
||||
if (what[0].str().size() != value_str.size()) {
|
||||
OLAP_LOG_WARNING("datetime str does not fully match. [value_str=%s match=%s]",
|
||||
value_str.c_str(), what[0].str().c_str());
|
||||
|
||||
@ -22,8 +22,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/assign.hpp>
|
||||
#include <boost/regex.hpp>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -32,13 +32,12 @@
|
||||
#include "olap/push_handler.h"
|
||||
#include "olap/storage_engine.h"
|
||||
#include "olap/utils.h"
|
||||
#include "util/cpu_info.h"
|
||||
#include "util/file_utils.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/cpu_info.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace doris;
|
||||
using namespace boost::assign;
|
||||
using google::protobuf::RepeatedPtrField;
|
||||
|
||||
namespace doris {
|
||||
@ -1117,7 +1116,7 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
|
||||
_delete_handler.finalize();
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
} // namespace doris
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
doris::init_glog("be-test");
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
package org.apache.doris.load;
|
||||
|
||||
import org.apache.doris.analysis.BinaryPredicate;
|
||||
import org.apache.doris.analysis.DateLiteral;
|
||||
import org.apache.doris.analysis.DeleteStmt;
|
||||
import org.apache.doris.analysis.InPredicate;
|
||||
import org.apache.doris.analysis.IsNullPredicate;
|
||||
@ -526,12 +527,16 @@ public class DeleteHandler implements Writable {
|
||||
// if a bool cond passed to be, be's zone_map cannot handle bool correctly,
|
||||
// change it to a tinyint type here;
|
||||
value = ((LiteralExpr) binaryPredicate.getChild(1)).getStringValue();
|
||||
if (column.getDataType() == PrimitiveType.BOOLEAN ) {
|
||||
if (column.getDataType() == PrimitiveType.BOOLEAN) {
|
||||
if (value.toLowerCase().equals("true")) {
|
||||
binaryPredicate.setChild(1, LiteralExpr.create("1", Type.TINYINT));
|
||||
} else if (value.toLowerCase().equals("false")) {
|
||||
binaryPredicate.setChild(1, LiteralExpr.create("0", Type.TINYINT));
|
||||
}
|
||||
} else if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) {
|
||||
DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType()));
|
||||
value = dateLiteral.getStringValue();
|
||||
binaryPredicate.setChild(1, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())));
|
||||
}
|
||||
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
|
||||
} catch (AnalysisException e) {
|
||||
@ -544,7 +549,13 @@ public class DeleteHandler implements Writable {
|
||||
InPredicate inPredicate = (InPredicate) condition;
|
||||
for (int i = 1; i <= inPredicate.getInElementNum(); i++) {
|
||||
value = ((LiteralExpr) inPredicate.getChild(i)).getStringValue();
|
||||
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
|
||||
if (column.getDataType() == PrimitiveType.DATE || column.getDataType() == PrimitiveType.DATETIME) {
|
||||
DateLiteral dateLiteral = new DateLiteral(value, Type.fromPrimitiveType(column.getDataType()));
|
||||
value = dateLiteral.getStringValue();
|
||||
inPredicate.setChild(i, LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType())));
|
||||
} else {
|
||||
LiteralExpr.create(value, Type.fromPrimitiveType(column.getDataType()));
|
||||
}
|
||||
}
|
||||
} catch (AnalysisException e) {
|
||||
throw new DdlException("Invalid column value[" + value + "] for column " + columnName);
|
||||
|
||||
Reference in New Issue
Block a user