From 9bcc3ae2838ccbccda2e425db8632bd60c96348c Mon Sep 17 00:00:00 2001 From: huangzhaowei Date: Tue, 28 Feb 2023 20:09:35 +0800 Subject: [PATCH] [Fix](DOE)Fix be core dump when parse es epoch_millis date format (#17100) --- be/src/exec/es/es_scroll_parser.cpp | 162 +++++++----------- be/src/exec/es/es_scroll_parser.h | 7 - .../elasticsearch/scripts/data/data1.json | 3 + .../elasticsearch/scripts/data/data1_es6.json | 25 +++ .../elasticsearch/scripts/data/data2.json | 3 + .../elasticsearch/scripts/data/data2_es6.json | 25 +++ .../elasticsearch/scripts/data/data3.json | 3 + .../elasticsearch/scripts/data/data3_es6.json | 25 +++ .../elasticsearch/scripts/es_init.sh | 12 +- .../scripts/index/es7_test1.json | 12 ++ .../scripts/index/es7_test2.json | 12 ++ regression-test/data/es_p0/test_es_query.out | 27 +-- .../suites/es_p0/test_es_query.groovy | 81 +++++++++ 13 files changed, 273 insertions(+), 124 deletions(-) create mode 100755 docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json create mode 100755 docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2_es6.json create mode 100755 docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3_es6.json diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 369871b779..e8e81c40f9 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -138,6 +138,14 @@ static const std::string INVALID_NULL_VALUE = return Status::RuntimeError(ss.str()); \ } while (false) +#define PARSE_DATE(dt_val, col, type, is_date_str) \ + if ((is_date_str && \ + !dt_val.from_date_str(static_cast(col.GetString()).c_str(), \ + col.GetStringLength())) || \ + (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { \ + RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); \ + } + template static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot, bool pure_doc_value) { @@ -172,46 +180,59 @@ static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, voi } template -static RT get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str) { - vectorized::DateV2Value dt_slot; - if ((is_date_str && - !dt_slot.from_date_str(static_cast(col.GetString()).c_str(), - col.GetStringLength())) || - (!is_date_str && !dt_slot.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); +static Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str, + RT* slot) { + constexpr bool is_datetime_v1 = std::is_same_v; + T dt_val; + PARSE_DATE(dt_val, col, type, is_date_str) + + if constexpr (is_datetime_v1) { + if (type == TYPE_DATE) { + dt_val.cast_to_date(); + } else { + dt_val.to_datetime(); + } } - return binary_cast, RT>( - *reinterpret_cast*>(&dt_slot)); + *reinterpret_cast(slot) = binary_cast(*reinterpret_cast(&dt_val)); + return Status::OK(); } template -static RT get_date_int(const rapidjson::Value& sub_col, PrimitiveType sub_type, - bool pure_doc_value) { +static Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value, + RT* slot) { // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source - if (sub_col.IsNumber()) { + if (col.IsNumber()) { // ES process date/datetime field would use millisecond timestamp for index or docvalue // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds - return get_date_value_int(sub_col, sub_type, false); - } else if (sub_col.IsArray() && pure_doc_value) { + return get_date_value_int(col, type, false, slot); + } else if (col.IsArray() && pure_doc_value) { // this would happened just only when `enable_docvalue_scan = true` // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose // a standard date-format for date field as `2020-06-16T00:00:00.000Z` // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for // date field's docvalue - if (sub_col[0].IsString()) { - return get_date_value_int(sub_col, sub_type, true); + if (col[0].IsString()) { + return get_date_value_int(col[0], type, true, slot); } // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds - return get_date_value_int(sub_col, sub_type, false); + return get_date_value_int(col[0], type, false, slot); } else { // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(sub_col, sub_type); - return get_date_value_int(sub_col, sub_type, true); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); + return get_date_value_int(col, type, true, slot); } } +template +static Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value, + vectorized::IColumn* col_ptr) { + RT data; + RETURN_IF_ERROR((get_date_int(col, type, pure_doc_value, &data))); + col_ptr->insert_data(const_cast(reinterpret_cast(&data)), 0); + return Status::OK(); +} template static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot, @@ -535,32 +556,17 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, case TYPE_DATE: case TYPE_DATETIME: + RETURN_IF_ERROR((fill_date_int( + col, type, pure_doc_value, col_ptr))); + break; case TYPE_DATEV2: + RETURN_IF_ERROR( + (fill_date_int, uint32_t>( + col, type, pure_doc_value, col_ptr))); + break; case TYPE_DATETIMEV2: { - // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source - if (col.IsNumber()) { - // ES process date/datetime field would use millisecond timestamp for index or docvalue - // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms - // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false)); - } else if (col.IsArray() && pure_doc_value) { - // this would happened just only when `enable_docvalue_scan = true` - // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose - // a standard date-format for date field as `2020-06-16T00:00:00.000Z` - // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for - // date field's docvalue - if (col[0].IsString()) { - RETURN_IF_ERROR(fill_date_col(col_ptr, col[0], type, true)); - break; - } - // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, false)); - } else { - // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - RETURN_IF_ERROR(fill_date_col(col_ptr, col, type, true)); - } + RETURN_IF_ERROR((fill_date_int, + uint64_t>(col, type, pure_doc_value, col_ptr))); break; } case TYPE_ARRAY: { @@ -623,7 +629,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, break; } case TYPE_FLOAT: { - float val; + float val {}; RETURN_IF_ERROR( get_float_value(sub_col, sub_type, &val, pure_doc_value)); array.push_back(val); @@ -671,13 +677,19 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, // see https://github.com/apache/doris/pull/16304 // No need to support date and datetime types. case TYPE_DATEV2: { - array.push_back(get_date_int( - sub_col, sub_type, pure_doc_value)); + uint32_t data; + RETURN_IF_ERROR( + (get_date_int, + uint32_t>(sub_col, sub_type, pure_doc_value, &data))); + array.push_back(data); break; } case TYPE_DATETIMEV2: { - array.push_back(get_date_int( - sub_col, sub_type, pure_doc_value)); + uint64_t data; + RETURN_IF_ERROR( + (get_date_int, + uint64_t>(sub_col, sub_type, pure_doc_value, &data))); + array.push_back(data); break; } default: { @@ -701,56 +713,4 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, return Status::OK(); } -Status ScrollParser::fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col, - PrimitiveType type, bool is_date_str) { - const std::string& val = col.GetString(); - size_t val_size = col.GetStringLength(); - - if (type == TYPE_DATE || type == TYPE_DATETIME) { - vectorized::VecDateTimeValue dt_val; - if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || - (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - if (type == TYPE_DATE) { - dt_val.cast_to_date(); - } else { - dt_val.to_datetime(); - } - - auto date_packed_int = binary_cast( - *reinterpret_cast(&dt_val)); - col_ptr->insert_data(const_cast(reinterpret_cast(&date_packed_int)), 0); - return Status::OK(); - } else if (type == TYPE_DATEV2) { - vectorized::DateV2Value dt_val; - if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || - (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - auto date_packed_int = binary_cast< - doris::vectorized::DateV2Value, uint32_t>( - *reinterpret_cast*>( - &dt_val)); - col_ptr->insert_data(const_cast(reinterpret_cast(&date_packed_int)), 0); - return Status::OK(); - - } else if (type == TYPE_DATETIMEV2) { - vectorized::DateV2Value dt_val; - if ((is_date_str && !dt_val.from_date_str(val.c_str(), val_size)) || - (!is_date_str && !dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00"))) { - RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); - } - auto date_packed_int = binary_cast< - vectorized::DateV2Value, uint64_t>( - *reinterpret_cast*>( - &dt_val)); - col_ptr->insert_data(const_cast(reinterpret_cast(&date_packed_int)), 0); - return Status::OK(); - - } else { - return Status::InternalError("Unsupported datetime type: " + type_to_string(type)); - } -} - } // namespace doris diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index 267811cb63..2a40bde974 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -40,13 +40,6 @@ public: const std::string& get_scroll_id(); int get_size() const; -private: - // helper method for processing date/datetime cols with rapidjson::Value - // type is used for distinguish date and datetime - // is_date_str indicate parse datetime from string, otherwise from epoch_millis - Status fill_date_col(vectorized::IColumn* col_ptr, const rapidjson::Value& col, - PrimitiveType type, bool is_date_str); - private: std::string _scroll_id; int _size; diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json index 30e91abe62..12bcbb0d5c 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json @@ -3,6 +3,9 @@ "test2": "text#1", "test3": 3.14, "test4": "2022-08-08", + "test5": "2022-08-08 12:10:10", + "test6": 1659931810000, + "test7": 1659931810000, "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json new file mode 100755 index 0000000000..30e91abe62 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json @@ -0,0 +1,25 @@ +{ + "test1": "string1", + "test2": "text#1", + "test3": 3.14, + "test4": "2022-08-08", + "c_bool": [true, false, true, true], + "c_byte": [1, -2, -3, 4], + "c_short": [128, 129, -129, -130], + "c_integer": [32768, 32769, -32769, -32770], + "c_long": [-1, 0, 1, 2], + "c_unsigned_long": [0, 1, 2, 3], + "c_float": [1.0, 1.1, 1.2, 1.3], + "c_half_float": [1, 2, 3, 4], + "c_double": [1, 2, 3, 4], + "c_scaled_float": [1, 2, 3, 4], + "c_date": ["2020-01-01", "2020-01-02"], + "c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"], + "c_keyword": ["a", "b", "c"], + "c_text": ["d", "e", "f"], + "c_ip": ["192.168.0.1", "127.0.0.1"], + "c_person": [ + {"name": "Andy", "age": 18}, + {"name": "Tim", "age": 28} + ] +} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json index 1481200c46..3b9ebdc6f2 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json @@ -3,6 +3,9 @@ "test2": "text2", "test3": 4, "test4": "2022-08-08", + "test5": "2022-08-09 12:10:10", + "test6": 1660018210000, + "test7": "2022-08-09 12:10:10", "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2_es6.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2_es6.json new file mode 100755 index 0000000000..1481200c46 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2_es6.json @@ -0,0 +1,25 @@ +{ + "test1": "string2", + "test2": "text2", + "test3": 4, + "test4": "2022-08-08", + "c_bool": [true, false, true, true], + "c_byte": [1, -2, -3, 4], + "c_short": [128, 129, -129, -130], + "c_integer": [32768, 32769, -32769, -32770], + "c_long": [-1, 0, 1, 2], + "c_unsigned_long": [0, 1, 2, 3], + "c_float": [1.0, 1.1, 1.2, 1.3], + "c_half_float": [1, 2, 3, 4], + "c_double": [1, 2, 3, 4], + "c_scaled_float": [1, 2, 3, 4], + "c_date": ["2020-01-01", "2020-01-02"], + "c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"], + "c_keyword": ["a", "b", "c"], + "c_text": ["d", "e", "f"], + "c_ip": ["192.168.0.1", "127.0.0.1"], + "c_person": [ + {"name": "Andy", "age": 18}, + {"name": "Tim", "age": 28} + ] +} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json index 90f7d636fa..9c10c2cf2a 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3.json @@ -3,6 +3,9 @@ "test2": "text3_4*5", "test3": 5.0, "test4": "2022-08-08", + "test5": "2022-08-10 12:10:10", + "test6": 1660018210000, + "test7": "2022-08-10 12:10:10", "c_bool": [true, false, true, true], "c_byte": [1, -2, -3, 4], "c_short": [128, 129, -129, -130], diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3_es6.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3_es6.json new file mode 100755 index 0000000000..90f7d636fa --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data3_es6.json @@ -0,0 +1,25 @@ +{ + "test1": "string3", + "test2": "text3_4*5", + "test3": 5.0, + "test4": "2022-08-08", + "c_bool": [true, false, true, true], + "c_byte": [1, -2, -3, 4], + "c_short": [128, 129, -129, -130], + "c_integer": [32768, 32769, -32769, -32770], + "c_long": [-1, 0, 1, 2], + "c_unsigned_long": [0, 1, 2, 3], + "c_float": [1.0, 1.1, 1.2, 1.3], + "c_half_float": [1, 2, 3, 4], + "c_double": [1, 2, 3, 4], + "c_scaled_float": [1, 2, 3, 4], + "c_date": ["2020-01-01", "2020-01-02"], + "c_datetime": ["2020-01-01 12:00:00", "2020-01-02 13:01:01"], + "c_keyword": ["a", "b", "c"], + "c_text": ["d", "e", "f"], + "c_ip": ["192.168.0.1", "127.0.0.1"], + "c_person": [ + {"name": "Andy", "age": 18}, + {"name": "Tim", "age": 28} + ] +} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh index 38ca5a010a..13947a064c 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh @@ -23,12 +23,12 @@ curl "http://${ES_6_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT # create index test2 curl "http://${ES_6_HOST}:9200/test2_20220808" -H "Content-Type:application/json" -X PUT -d '@/mnt/scripts/index/es6_test2.json' # put data -curl "http://${ES_6_HOST}:9200/test1/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' -curl "http://${ES_6_HOST}:9200/test1/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' -curl "http://${ES_6_HOST}:9200/test1/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' -curl "http://${ES_6_HOST}:9200/test2_20220808/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1.json' -curl "http://${ES_6_HOST}:9200/test2_20220808/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2.json' -curl "http://${ES_6_HOST}:9200/test2_20220808/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3.json' +curl "http://${ES_6_HOST}:9200/test1/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json' +curl "http://${ES_6_HOST}:9200/test1/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json' +curl "http://${ES_6_HOST}:9200/test1/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json' +curl "http://${ES_6_HOST}:9200/test2_20220808/doc/1" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data1_es6.json' +curl "http://${ES_6_HOST}:9200/test2_20220808/doc/2" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data2_es6.json' +curl "http://${ES_6_HOST}:9200/test2_20220808/doc/3" -H "Content-Type:application/json" -X POST -d '@/mnt/scripts/data/data3_es6.json' # put _meta for array curl "http://${ES_6_HOST}:9200/test1/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json index 0bfa30f011..ebc4227a20 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test1.json @@ -23,6 +23,18 @@ "test4": { "type": "date" }, + "test5": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss" + }, + "test6": { + "type": "date", + "format": "epoch_millis" + }, + "test7": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss || epoch_millis" + }, "c_bool": { "type": "boolean" }, diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json index 7334dfd23b..00143131d0 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_test2.json @@ -26,6 +26,18 @@ "test4": { "type": "date" }, + "test5": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss" + }, + "test6": { + "type": "date", + "format": "epoch_millis" + }, + "test7": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss || epoch_millis" + }, "c_bool": { "type": "boolean" }, diff --git a/regression-test/data/es_p0/test_es_query.out b/regression-test/data/es_p0/test_es_query.out index 8613453400..72c21f8acf 100644 --- a/regression-test/data/es_p0/test_es_query.out +++ b/regression-test/data/es_p0/test_es_query.out @@ -1,4 +1,10 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !sql52 -- +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08T12:10:10 2022-08-08T12:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] + +-- !sql51 -- +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08T12:10:10 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08T12:10:10 2022-08-08T12:10:10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] + -- !sql62 -- ['2020-01-01 12:00:00', '2020-01-02 13:01:01'] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] @@ -21,15 +27,15 @@ true 1 128 32768 -1 0 1.0 1 1 1 2020-01-01 2020-01-01 12:00:00 a d 192.168.0.1 { true 1 128 32768 -1 0 1.0 1 1 1 2020-01-01 2020-01-01 12:00:00 a d 192.168.0.1 {"name":"Andy","age":18} -- !sql72 -- -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -- !sql73 -- -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 2022-08-10 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -- !sql74 -- -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -- !sql75 -- true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} @@ -42,12 +48,12 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"na true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} -- !sql81 -- -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -- !sql82 -- -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string1 [1, 2, 3, 4] 2022-08-08 2022-08-08 text#1 [2020-01-01, 2020-01-02] 3.14 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-08 2022-08-08 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string2 [1, 2, 3, 4] 2022-08-08 2022-08-09 text2 [2020-01-01, 2020-01-02] 4.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-09 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] +[2020-01-01, 2020-01-02] [-1, 0, 1, 2] [0, 1, 2, 3] ['d', 'e', 'f'] [128, 129, -129, -130] ['192.168.0.1', '127.0.0.1'] string3 [1, 2, 3, 4] 2022-08-08 2022-08-10 text3_4*5 [2020-01-01, 2020-01-02] 5.0 [1, 2, 3, 4] [1, 1.1, 1.2, 1.3] [1, 2, 3, 4] ['a', 'b', 'c'] ['{"name":"Andy","age":18}', '{"name":"Tim","age":28}'] 2022-08-09 2022-08-10 [1, -2, -3, 4] [1, 0, 1, 1] [32768, 32769, -32769, -32770] -- !sql83 -- true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} @@ -57,4 +63,5 @@ true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"na -- !sql84 -- true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} -true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} \ No newline at end of file +true 1 128 32768 -1 0 1.0 1.0 1.0 1.0 2020-01-01 2020-01-01 a d 192.168.0.1 {"name":"Andy","age":18} + diff --git a/regression-test/suites/es_p0/test_es_query.groovy b/regression-test/suites/es_p0/test_es_query.groovy index 7bfa3ed8a7..9127c5e75f 100644 --- a/regression-test/suites/es_p0/test_es_query.groovy +++ b/regression-test/suites/es_p0/test_es_query.groovy @@ -26,6 +26,11 @@ suite("test_es_query", "p0") { sql """drop catalog if exists es6;""" sql """drop catalog if exists es7;""" sql """drop catalog if exists es8;""" + sql """drop resource if exists es6_resource;""" + sql """drop resource if exists es7_resource;""" + sql """drop resource if exists es8_resource;""" + sql """drop table if exists test_v1;""" + sql """drop table if exists test_v2;""" // test old create-catalog syntax for compatibility sql """ @@ -55,6 +60,82 @@ suite("test_es_query", "p0") { ); """ + // test external table for datetime + sql """ + CREATE TABLE `test_v1` ( + `c_datetime` array NULL, + `c_long` array NULL, + `c_unsigned_long` array NULL, + `c_text` array NULL, + `c_short` array NULL, + `c_ip` array NULL, + `test1` text NULL, + `c_half_float` array NULL, + `test4` date NULL, + `test5` datetime NULL, + `test2` text NULL, + `c_date` array NULL, + `test3` double NULL, + `c_scaled_float` array NULL, + `c_float` array NULL, + `c_double` array NULL, + `c_keyword` array NULL, + `c_person` array NULL, + `test6` datetime NULL, + `test7` datetime NULL, + `c_byte` array NULL, + `c_bool` array NULL, + `c_integer` array NULL + ) ENGINE=ELASTICSEARCH + COMMENT 'ELASTICSEARCH' + PROPERTIES ( + "hosts" = "http://127.0.0.1:$es_8_port", + "index" = "test1", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "http_ssl_enabled"="false" + ); + """ + order_qt_sql52 """select * from test_v1 where test2='text#1'""" + + sql """ + CREATE TABLE `test_v2` ( + `c_datetime` array NULL, + `c_long` array NULL, + `c_unsigned_long` array NULL, + `c_text` array NULL, + `c_short` array NULL, + `c_ip` array NULL, + `test1` text NULL, + `c_half_float` array NULL, + `test4` datev2 NULL, + `test5` datetimev2 NULL, + `test2` text NULL, + `c_date` array NULL, + `test3` double NULL, + `c_scaled_float` array NULL, + `c_float` array NULL, + `c_double` array NULL, + `c_keyword` array NULL, + `c_person` array NULL, + `test6` datetimev2 NULL, + `test7` datetimev2 NULL, + `c_byte` array NULL, + `c_bool` array NULL, + `c_integer` array NULL + ) ENGINE=ELASTICSEARCH + COMMENT 'ELASTICSEARCH' + PROPERTIES ( + "hosts" = "http://127.0.0.1:$es_8_port", + "index" = "test1", + "nodes_discovery"="false", + "enable_keyword_sniff"="true", + "http_ssl_enabled"="false" + ); + """ + order_qt_sql51 """select * from test_v2 where test2='text#1'""" + + sql """create catalog if not exists es6 with resource es6_resource;""" sql """create catalog if not exists es7 with resource es7_resource;""" sql """create catalog if not exists es8 with resource es8_resource;"""