[Fix](DOE)Fix be core dump when parse es epoch_millis date format (#17100)

This commit is contained in:
huangzhaowei
2023-02-28 20:09:35 +08:00
committed by GitHub
parent 94cea0ea6d
commit 9bcc3ae283
13 changed files with 273 additions and 124 deletions

View File

@ -138,6 +138,14 @@ static const std::string INVALID_NULL_VALUE =
return Status::RuntimeError(ss.str()); \
} while (false)
#define PARSE_DATE(dt_val, col, type, is_date_str) \
if ((is_date_str && \
!dt_val.from_date_str(static_cast<const std::string>(col.GetString()).c_str(), \
col.GetStringLength())) || \
(!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { \
RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); \
}
template <typename T>
static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
bool pure_doc_value) {
@ -172,46 +180,59 @@ static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, voi
}
template <typename T, typename RT>
static RT get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str) {
vectorized::DateV2Value<T> dt_slot;
if ((is_date_str &&
!dt_slot.from_date_str(static_cast<const std::string>(col.GetString()).c_str(),
col.GetStringLength())) ||
(!is_date_str && !dt_slot.from_unixtime(col.GetInt64() / 1000, "+08:00"))) {
RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
static Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str,
RT* slot) {
constexpr bool is_datetime_v1 = std::is_same_v<T, vectorized::VecDateTimeValue>;
T dt_val;
PARSE_DATE(dt_val, col, type, is_date_str)
if constexpr (is_datetime_v1) {
if (type == TYPE_DATE) {
dt_val.cast_to_date();
} else {
dt_val.to_datetime();
}
}
return binary_cast<doris::vectorized::DateV2Value<T>, RT>(
*reinterpret_cast<vectorized::DateV2Value<T>*>(&dt_slot));
*reinterpret_cast<RT*>(slot) = binary_cast<T, RT>(*reinterpret_cast<T*>(&dt_val));
return Status::OK();
}
template <typename T, typename RT>
static RT get_date_int(const rapidjson::Value& sub_col, PrimitiveType sub_type,
bool pure_doc_value) {
static Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
RT* slot) {
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
if (sub_col.IsNumber()) {
if (col.IsNumber()) {
// ES process date/datetime field would use millisecond timestamp for index or docvalue
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
return get_date_value_int<T, RT>(sub_col, sub_type, false);
} else if (sub_col.IsArray() && pure_doc_value) {
return get_date_value_int<T, RT>(col, type, false, slot);
} else if (col.IsArray() && pure_doc_value) {
// this would happened just only when `enable_docvalue_scan = true`
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
// date field's docvalue
if (sub_col[0].IsString()) {
return get_date_value_int<T, RT>(sub_col, sub_type, true);
if (col[0].IsString()) {
return get_date_value_int<T, RT>(col[0], type, true, slot);
}
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
return get_date_value_int<T, RT>(sub_col, sub_type, false);
return get_date_value_int<T, RT>(col[0], type, false, slot);
} else {
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type);
RETURN_ERROR_IF_COL_IS_NOT_STRING(sub_col, sub_type);
return get_date_value_int<T, RT>(sub_col, sub_type, true);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
return get_date_value_int<T, RT>(col, type, true, slot);
}
}
template <typename T, typename RT>
static Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
vectorized::IColumn* col_ptr) {
RT data;
RETURN_IF_ERROR((get_date_int<T, RT>(col, type, pure_doc_value, &data)));
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
return Status::OK();
}
template <typename T>
static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
@ -535,32 +556,17 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
case TYPE_DATE:
case TYPE_DATETIME:
RETURN_IF_ERROR((fill_date_int<vectorized::VecDateTimeValue, int64_t>(
col, type, pure_doc_value, col_ptr)));
break;
case TYPE_DATEV2:
RETURN_IF_ERROR(
(fill_date_int<vectorized::DateV2Value<vectorized::DateV2ValueType>, uint32_t>(
col, type, pure_doc_value, col_ptr)));
break;
case TYPE_DATETIMEV2: {
// this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
if (col.IsNumber()) {
// ES process date/datetime field would use millisecond timestamp for index or docvalue
// processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
// Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false));
} else if (col.IsArray() && pure_doc_value) {
// this would happened just only when `enable_docvalue_scan = true`
// ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
// a standard date-format for date field as `2020-06-16T00:00:00.000Z`
// At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
// date field's docvalue
if (col[0].IsString()) {
RETURN_IF_ERROR(fill_date_col(col_ptr, col[0], type, true));
break;
}
// ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false));
} else {
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, true));
}
RETURN_IF_ERROR((fill_date_int<vectorized::DateV2Value<vectorized::DateTimeV2ValueType>,
uint64_t>(col, type, pure_doc_value, col_ptr)));
break;
}
case TYPE_ARRAY: {
@ -623,7 +629,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
break;
}
case TYPE_FLOAT: {
float val;
float val {};
RETURN_IF_ERROR(
get_float_value<float>(sub_col, sub_type, &val, pure_doc_value));
array.push_back(val);
@ -671,13 +677,19 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
// see https://github.com/apache/doris/pull/16304
// No need to support date and datetime types.
case TYPE_DATEV2: {
array.push_back(get_date_int<vectorized::DateV2ValueType, uint32_t>(
sub_col, sub_type, pure_doc_value));
uint32_t data;
RETURN_IF_ERROR(
(get_date_int<vectorized::DateV2Value<vectorized::DateV2ValueType>,
uint32_t>(sub_col, sub_type, pure_doc_value, &data)));
array.push_back(data);
break;
}
case TYPE_DATETIMEV2: {
array.push_back(get_date_int<vectorized::DateTimeV2ValueType, uint64_t>(
sub_col, sub_type, pure_doc_value));
uint64_t data;
RETURN_IF_ERROR(
(get_date_int<vectorized::DateV2Value<vectorized::DateTimeV2ValueType>,
uint64_t>(sub_col, sub_type, pure_doc_value, &data)));
array.push_back(data);
break;
}
default: {
@ -701,56 +713,4 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
return Status::OK();
}
Status ScrollParser::fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col,
PrimitiveType type, bool is_date_str) {
const std::string& val = col.GetString();
size_t val_size = col.GetStringLength();
if (type == TYPE_DATE || type == TYPE_DATETIME) {
vectorized::VecDateTimeValue dt_val;
if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
(!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) {
RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
}
if (type == TYPE_DATE) {
dt_val.cast_to_date();
} else {
dt_val.to_datetime();
}
auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
*reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
return Status::OK();
} else if (type == TYPE_DATEV2) {
vectorized::DateV2Value<doris::vectorized::DateV2ValueType> dt_val;
if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
(!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) {
RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
}
auto date_packed_int = binary_cast<
doris::vectorized::DateV2Value<doris::vectorized::DateV2ValueType>, uint32_t>(
*reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateV2ValueType>*>(
&dt_val));
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
return Status::OK();
} else if (type == TYPE_DATETIMEV2) {
vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType> dt_val;
if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) ||
(!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) {
RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
}
auto date_packed_int = binary_cast<
vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>, uint64_t>(
*reinterpret_cast<vectorized::DateV2Value<doris::vectorized::DateTimeV2ValueType>*>(
&dt_val));
col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
return Status::OK();
} else {
return Status::InternalError("Unsupported datetime type: " + type_to_string(type));
}
}
} // namespace doris

View File

@ -40,13 +40,6 @@ public:
const std::string& get_scroll_id();
int get_size() const;
private:
// helper method for processing date/datetime cols with rapidjson::Value
// type is used for distinguish date and datetime
// is_date_str indicate parse datetime from string, otherwise from epoch_millis
Status fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col,
PrimitiveType type, bool is_date_str);
private:
std::string _scroll_id;
int _size;

View File

@ -3,6 +3,9 @@
"test2": "text#1",
"test3": 3.14,
"test4": "2022-08-08",
"test5": "2022-08-08 12:10:10",
"test6": 1659931810000,
"test7": 1659931810000,
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],

View File

@ -0,0 +1,25 @@
{
"test1": "string1",
"test2": "text#1",
"test3": 3.14,
"test4": "2022-08-08",
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],
"c_integer": [32768, 32769, -32769, -32770],
"c_long": [-1, 0, 1, 2],
"c_unsigned_long": [0, 1, 2, 3],
"c_float": [1.0, 1.1, 1.2, 1.3],
"c_half_float": [1, 2, 3, 4],
"c_double": [1, 2, 3, 4],
"c_scaled_float": [1, 2, 3, 4],
"c_date": ["2020-01-01", "2020-01-02"],
"c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"],
"c_keyword": ["a", "b", "c"],
"c_text": ["d", "e", "f"],
"c_ip": ["192.168.0.1", "127.0.0.1"],
"c_person": [
{"name": "Andy", "age": 18},
{"name": "Tim", "age": 28}
]
}

View File

@ -3,6 +3,9 @@
"test2": "text2",
"test3": 4,
"test4": "2022-08-08",
"test5": "2022-08-09 12:10:10",
"test6": 1660018210000,
"test7": "2022-08-09 12:10:10",
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],

View File

@ -0,0 +1,25 @@
{
"test1": "string2",
"test2": "text2",
"test3": 4,
"test4": "2022-08-08",
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],
"c_integer": [32768, 32769, -32769, -32770],
"c_long": [-1, 0, 1, 2],
"c_unsigned_long": [0, 1, 2, 3],
"c_float": [1.0, 1.1, 1.2, 1.3],
"c_half_float": [1, 2, 3, 4],
"c_double": [1, 2, 3, 4],
"c_scaled_float": [1, 2, 3, 4],
"c_date": ["2020-01-01", "2020-01-02"],
"c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"],
"c_keyword": ["a", "b", "c"],
"c_text": ["d", "e", "f"],
"c_ip": ["192.168.0.1", "127.0.0.1"],
"c_person": [
{"name": "Andy", "age": 18},
{"name": "Tim", "age": 28}
]
}

View File

@ -3,6 +3,9 @@
"test2": "text3_4*5",
"test3": 5.0,
"test4": "2022-08-08",
"test5": "2022-08-10 12:10:10",
"test6": 1660018210000,
"test7": "2022-08-10 12:10:10",
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],

View File

@ -0,0 +1,25 @@
{
"test1": "string3",
"test2": "text3_4*5",
"test3": 5.0,
"test4": "2022-08-08",
"c_bool": [true, false, true, true],
"c_byte": [1, -2, -3, 4],
"c_short": [128, 129, -129, -130],
"c_integer": [32768, 32769, -32769, -32770],
"c_long": [-1, 0, 1, 2],
"c_unsigned_long": [0, 1, 2, 3],
"c_float": [1.0, 1.1, 1.2, 1.3],
"c_half_float": [1, 2, 3, 4],
"c_double": [1, 2, 3, 4],
"c_scaled_float": [1, 2, 3, 4],
"c_date": ["2020-01-01", "2020-01-02"],
"c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"],
"c_keyword": ["a", "b", "c"],
"c_text": ["d", "e", "f"],
"c_ip": ["192.168.0.1", "127.0.0.1"],
"c_person": [
{"name": "Andy", "age": 18},
{"name": "Tim", "age": 28}
]
}

View File

@ -23,12 +23,12 @@ curl "http://${ES_6_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT
# create index test2
curl "http://${ES_6_HOST}:9200/test2_20220808" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es6_test2.json'
# put data
curl "http://${ES_6_HOST}:9200/test1/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json'
curl "http://${ES_6_HOST}:9200/test1/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json'
curl "http://${ES_6_HOST}:9200/test1/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json'
curl "http://${ES_6_HOST}:9200/test1/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json'
curl "http://${ES_6_HOST}:9200/test1/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json'
curl "http://${ES_6_HOST}:9200/test1/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json'
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json'
# put _meta for array
curl "http://${ES_6_HOST}:9200/test1/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"
curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"

View File

@ -23,6 +23,18 @@
"test4": {
"type": "date"
},
"test5": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"test6": {
"type": "date",
"format": "epoch_millis"
},
"test7": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss || epoch_millis"
},
"c_bool": {
"type": "boolean"
},

View File

@ -26,6 +26,18 @@
"test4": {
"type": "date"
},
"test5": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"test6": {
"type": "date",
"format": "epoch_millis"
},
"test7": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss || epoch_millis"
},
"c_bool": {
"type": "boolean"
},

View File

@ -1,4 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql52 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08T12:10:10 2022-08-08T12:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql51 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08T12:10:10 2022-08-08T12:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql62 --
['2020-01-01 12:00:00', '2020-01-02 13:01:01'] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
@ -21,15 +27,15 @@ true 1 128 32768 -1 0 1.0 1 1 1 2020-01-01 2020-01-01 12:00:00 a d 192.168.0.1 {
true 1 128 32768 -1 0 1.0 1 1 1 2020-01-01 2020-01-01 12:00:00 a d 192.168.0.1 {"name":"Andy","age":18}
-- !sql72 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql73 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 2022-08-10 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql74 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql75 --
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
@ -42,12 +48,12 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"na
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
-- !sql81 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql82 --
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 2022-08-10 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770]
-- !sql83 --
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
@ -57,4 +63,5 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"na
-- !sql84 --
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}
true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18}

View File

@ -26,6 +26,11 @@ suite("test_es_query", "p0") {
sql """drop catalog if exists es6;"""
sql """drop catalog if exists es7;"""
sql """drop catalog if exists es8;"""
sql """drop resource if exists es6_resource;"""
sql """drop resource if exists es7_resource;"""
sql """drop resource if exists es8_resource;"""
sql """drop table if exists test_v1;"""
sql """drop table if exists test_v2;"""
// test old create-catalog syntax for compatibility
sql """
@ -55,6 +60,82 @@ suite("test_es_query", "p0") {
);
"""
// test external table for datetime
sql """
CREATE TABLE `test_v1` (
`c_datetime` array<datev2> NULL,
`c_long` array<bigint(20)> NULL,
`c_unsigned_long` array<largeint(40)> NULL,
`c_text` array<text> NULL,
`c_short` array<smallint(6)> NULL,
`c_ip` array<text> NULL,
`test1` text NULL,
`c_half_float` array<float> NULL,
`test4` date NULL,
`test5` datetime NULL,
`test2` text NULL,
`c_date` array<datev2> NULL,
`test3` double NULL,
`c_scaled_float` array<double> NULL,
`c_float` array<float> NULL,
`c_double` array<double> NULL,
`c_keyword` array<text> NULL,
`c_person` array<text> NULL,
`test6` datetime NULL,
`test7` datetime NULL,
`c_byte` array<tinyint(4)> NULL,
`c_bool` array<boolean> NULL,
`c_integer` array<int(11)> NULL
) ENGINE=ELASTICSEARCH
COMMENT 'ELASTICSEARCH'
PROPERTIES (
"hosts" = "http://127.0.0.1:$es_8_port",
"index" = "test1",
"nodes_discovery"="false",
"enable_keyword_sniff"="true",
"http_ssl_enabled"="false"
);
"""
order_qt_sql52 """select * from test_v1 where test2='text#1'"""
sql """
CREATE TABLE `test_v2` (
`c_datetime` array<datev2> NULL,
`c_long` array<bigint(20)> NULL,
`c_unsigned_long` array<largeint(40)> NULL,
`c_text` array<text> NULL,
`c_short` array<smallint(6)> NULL,
`c_ip` array<text> NULL,
`test1` text NULL,
`c_half_float` array<float> NULL,
`test4` datev2 NULL,
`test5` datetimev2 NULL,
`test2` text NULL,
`c_date` array<datev2> NULL,
`test3` double NULL,
`c_scaled_float` array<double> NULL,
`c_float` array<float> NULL,
`c_double` array<double> NULL,
`c_keyword` array<text> NULL,
`c_person` array<text> NULL,
`test6` datetimev2 NULL,
`test7` datetimev2 NULL,
`c_byte` array<tinyint(4)> NULL,
`c_bool` array<boolean> NULL,
`c_integer` array<int(11)> NULL
) ENGINE=ELASTICSEARCH
COMMENT 'ELASTICSEARCH'
PROPERTIES (
"hosts" = "http://127.0.0.1:$es_8_port",
"index" = "test1",
"nodes_discovery"="false",
"enable_keyword_sniff"="true",
"http_ssl_enabled"="false"
);
"""
order_qt_sql51 """select * from test_v2 where test2='text#1'"""
sql """create catalog if not exists es6 with resource es6_resource;"""
sql """create catalog if not exists es7 with resource es7_resource;"""
sql """create catalog if not exists es8 with resource es8_resource;"""