[improvement](function) add timezone cache for convert_tz (#14616)
This commit is contained in:
@ -915,39 +915,43 @@ void TimestampFunctions::convert_tz_prepare(doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL || context->get_num_args() != 3 ||
|
||||
context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR ||
|
||||
context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR ||
|
||||
!context->is_arg_constant(1) || !context->is_arg_constant(2)) {
|
||||
context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR) {
|
||||
return;
|
||||
}
|
||||
|
||||
ConvertTzCtx* ctc = new ConvertTzCtx();
|
||||
context->set_function_state(scope, ctc);
|
||||
|
||||
// find from timezone
|
||||
StringVal* from = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
|
||||
if (UNLIKELY(from->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)from->ptr, from->len),
|
||||
ctc->from_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
if (context->is_arg_constant(1)) {
|
||||
// find from timezone
|
||||
StringVal* from = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
|
||||
if (UNLIKELY(from->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)from->ptr, from->len),
|
||||
ctc->from_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
ctc->constant_from = true;
|
||||
}
|
||||
|
||||
// find to timezone
|
||||
StringVal* to = reinterpret_cast<StringVal*>(context->get_constant_arg(2));
|
||||
if (UNLIKELY(to->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)to->ptr, to->len), ctc->to_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
if (context->is_arg_constant(2)) {
|
||||
// find to timezone
|
||||
StringVal* to = reinterpret_cast<StringVal*>(context->get_constant_arg(2));
|
||||
if (UNLIKELY(to->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)to->ptr, to->len), ctc->to_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
ctc->constant_to = true;
|
||||
}
|
||||
|
||||
ctc->is_valid = true;
|
||||
return;
|
||||
}
|
||||
|
||||
DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeVal& ts_val,
|
||||
@ -975,12 +979,40 @@ DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeV
|
||||
}
|
||||
|
||||
int64_t timestamp;
|
||||
if (!ts_value.unix_timestamp(×tamp, ctc->from_tz)) {
|
||||
return DateTimeVal::null();
|
||||
|
||||
if (ctc->constant_from) {
|
||||
if (!ts_value.unix_timestamp(×tamp, ctc->from_tz)) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
} else {
|
||||
auto from_tz_string = from_tz.to_string();
|
||||
if (UNLIKELY(ctc->time_zone_cache.find(from_tz_string) == ctc->time_zone_cache.cend())) {
|
||||
if (UNLIKELY(!TimezoneUtils::find_cctz_time_zone(
|
||||
from_tz_string, ctc->time_zone_cache[from_tz_string]))) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
}
|
||||
if (!ts_value.unix_timestamp(×tamp, ctc->time_zone_cache[from_tz_string])) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
}
|
||||
|
||||
DateTimeValue ts_value2;
|
||||
if (!ts_value2.from_unixtime(timestamp, ctc->to_tz)) {
|
||||
return DateTimeVal::null();
|
||||
if (ctc->constant_to) {
|
||||
if (!ts_value2.from_unixtime(timestamp, ctc->to_tz)) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
} else {
|
||||
auto to_tz_string = to_tz.to_string();
|
||||
if (UNLIKELY(ctc->time_zone_cache.find(to_tz_string) == ctc->time_zone_cache.cend())) {
|
||||
if (UNLIKELY(!TimezoneUtils::find_cctz_time_zone(to_tz_string,
|
||||
ctc->time_zone_cache[to_tz_string]))) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
}
|
||||
if (!ts_value2.from_unixtime(timestamp, ctc->time_zone_cache[to_tz_string])) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
}
|
||||
|
||||
DateTimeVal return_val;
|
||||
|
||||
@ -43,8 +43,11 @@ struct FormatCtx {
|
||||
struct ConvertTzCtx {
|
||||
// false means the format is invalid, and the function always return null
|
||||
bool is_valid = false;
|
||||
bool constant_from = false;
|
||||
bool constant_to = false;
|
||||
cctz::time_zone from_tz;
|
||||
cctz::time_zone to_tz;
|
||||
std::map<std::string, cctz::time_zone> time_zone_cache;
|
||||
};
|
||||
|
||||
class TimestampFunctions {
|
||||
|
||||
@ -26,6 +26,10 @@
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
struct ConvertTzCtx {
|
||||
std::map<StringRef, cctz::time_zone> time_zone_cache;
|
||||
};
|
||||
|
||||
template <typename DateValueType, typename ArgType>
|
||||
struct ConvertTZImpl {
|
||||
using ColumnType = std::conditional_t<
|
||||
@ -47,6 +51,10 @@ struct ConvertTZImpl {
|
||||
const ColumnString* from_tz_column, const ColumnString* to_tz_column,
|
||||
ReturnColumnType* result_column, NullMap& result_null_map,
|
||||
size_t input_rows_count) {
|
||||
auto convert_ctx = reinterpret_cast<ConvertTzCtx*>(
|
||||
context->get_function_state(FunctionContext::FunctionStateScope::THREAD_LOCAL));
|
||||
std::map<StringRef, cctz::time_zone> time_zone_cache_;
|
||||
auto& time_zone_cache = convert_ctx ? convert_ctx->time_zone_cache : time_zone_cache_;
|
||||
for (size_t i = 0; i < input_rows_count; i++) {
|
||||
if (result_null_map[i]) {
|
||||
result_column->insert_default();
|
||||
@ -60,14 +68,32 @@ struct ConvertTZImpl {
|
||||
binary_cast<NativeType, DateValueType>(date_column->get_element(i));
|
||||
int64_t timestamp;
|
||||
|
||||
if (!ts_value.unix_timestamp(×tamp, from_tz.to_string())) {
|
||||
if (time_zone_cache.find(from_tz) == time_zone_cache.cend()) {
|
||||
if (!TimezoneUtils::find_cctz_time_zone(from_tz.to_string(),
|
||||
time_zone_cache[from_tz])) {
|
||||
result_null_map[i] = true;
|
||||
result_column->insert_default();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (time_zone_cache.find(to_tz) == time_zone_cache.cend()) {
|
||||
if (!TimezoneUtils::find_cctz_time_zone(to_tz.to_string(),
|
||||
time_zone_cache[to_tz])) {
|
||||
result_null_map[i] = true;
|
||||
result_column->insert_default();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ts_value.unix_timestamp(×tamp, time_zone_cache[from_tz])) {
|
||||
result_null_map[i] = true;
|
||||
result_column->insert_default();
|
||||
continue;
|
||||
}
|
||||
|
||||
ReturnDateType ts_value2;
|
||||
if (!ts_value2.from_unixtime(timestamp, to_tz.to_string())) {
|
||||
if (!ts_value2.from_unixtime(timestamp, time_zone_cache[to_tz])) {
|
||||
result_null_map[i] = true;
|
||||
result_column->insert_default();
|
||||
continue;
|
||||
@ -112,6 +138,24 @@ public:
|
||||
bool use_default_implementation_for_constants() const override { return true; }
|
||||
bool use_default_implementation_for_nulls() const override { return false; }
|
||||
|
||||
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope != FunctionContext::THREAD_LOCAL) {
|
||||
return Status::OK();
|
||||
}
|
||||
context->set_function_state(scope, new ConvertTzCtx);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
|
||||
if (scope == FunctionContext::THREAD_LOCAL) {
|
||||
auto* convert_ctx = reinterpret_cast<ConvertTzCtx*>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
delete convert_ctx;
|
||||
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) override {
|
||||
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
|
||||
|
||||
@ -23,6 +23,64 @@
|
||||
-- !sql --
|
||||
\N
|
||||
|
||||
-- !sql1 --
|
||||
1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03
|
||||
2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03
|
||||
3 2019-08-01T13:21:03 Asia/Taipei Asia/Shanghai 2019-08-01T13:21:03
|
||||
4 2019-08-02T13:21:03 Australia/Queensland Asia/Shanghai 2019-08-02T11:21:03
|
||||
5 2019-08-02T13:21:03 Australia/Lindeman Asia/Shanghai 2019-08-02T11:21:03
|
||||
6 2019-08-03T13:21:03 America/Aruba Asia/Shanghai 2019-08-04T01:21:03
|
||||
7 2019-08-03T13:21:03 America/Blanc-Sablon Asia/Shanghai 2019-08-04T01:21:03
|
||||
8 2019-08-04T13:21:03 America/Dawson Africa/Lusaka 2019-08-04T22:21:03
|
||||
9 2019-08-04T13:21:03 America/Creston Africa/Lusaka 2019-08-04T22:21:03
|
||||
10 2019-08-05T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-05T13:21:03
|
||||
11 2019-08-05T13:21:03 Asia/Shanghai Asia/Singapore 2019-08-05T13:21:03
|
||||
12 2019-08-05T13:21:03 Asia/Shanghai Asia/Taipei 2019-08-05T13:21:03
|
||||
13 2019-08-06T13:21:03 Asia/Shanghai Australia/Queensland 2019-08-06T15:21:03
|
||||
14 2019-08-06T13:21:03 Asia/Shanghai Australia/Lindeman 2019-08-06T15:21:03
|
||||
15 2019-08-07T13:21:03 Asia/Shanghai America/Aruba 2019-08-07T01:21:03
|
||||
16 2019-08-07T13:21:03 Asia/Shanghai America/Blanc-Sablon 2019-08-07T01:21:03
|
||||
17 2019-08-08T13:21:03 Africa/Lusaka America/Dawson 2019-08-08T04:21:03
|
||||
18 2019-08-08T13:21:03 Africa/Lusaka America/Creston 2019-08-08T04:21:03
|
||||
|
||||
-- !sql2 --
|
||||
2019-08-01T13:21:03 2019-08-01T13:21:03 2019-08-01T13:21:03
|
||||
|
||||
-- !sql3 --
|
||||
2019-08-02T11:21:03 2019-08-02T11:21:03 2019-08-02T11:21:03
|
||||
|
||||
-- !sql4 --
|
||||
2019-08-04T22:21:03 2019-08-04T22:21:03 2019-08-04T22:21:03
|
||||
|
||||
-- !sql_vec1 --
|
||||
1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03
|
||||
2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03
|
||||
3 2019-08-01T13:21:03 Asia/Taipei Asia/Shanghai 2019-08-01T13:21:03
|
||||
4 2019-08-02T13:21:03 Australia/Queensland Asia/Shanghai 2019-08-02T11:21:03
|
||||
5 2019-08-02T13:21:03 Australia/Lindeman Asia/Shanghai 2019-08-02T11:21:03
|
||||
6 2019-08-03T13:21:03 America/Aruba Asia/Shanghai 2019-08-04T01:21:03
|
||||
7 2019-08-03T13:21:03 America/Blanc-Sablon Asia/Shanghai 2019-08-04T01:21:03
|
||||
8 2019-08-04T13:21:03 America/Dawson Africa/Lusaka 2019-08-04T22:21:03
|
||||
9 2019-08-04T13:21:03 America/Creston Africa/Lusaka 2019-08-04T22:21:03
|
||||
10 2019-08-05T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-05T13:21:03
|
||||
11 2019-08-05T13:21:03 Asia/Shanghai Asia/Singapore 2019-08-05T13:21:03
|
||||
12 2019-08-05T13:21:03 Asia/Shanghai Asia/Taipei 2019-08-05T13:21:03
|
||||
13 2019-08-06T13:21:03 Asia/Shanghai Australia/Queensland 2019-08-06T15:21:03
|
||||
14 2019-08-06T13:21:03 Asia/Shanghai Australia/Lindeman 2019-08-06T15:21:03
|
||||
15 2019-08-07T13:21:03 Asia/Shanghai America/Aruba 2019-08-07T01:21:03
|
||||
16 2019-08-07T13:21:03 Asia/Shanghai America/Blanc-Sablon 2019-08-07T01:21:03
|
||||
17 2019-08-08T13:21:03 Africa/Lusaka America/Dawson 2019-08-08T04:21:03
|
||||
18 2019-08-08T13:21:03 Africa/Lusaka America/Creston 2019-08-08T04:21:03
|
||||
|
||||
-- !sql_vec2 --
|
||||
2019-08-01T13:21:03 2019-08-01T13:21:03 2019-08-01T13:21:03
|
||||
|
||||
-- !sql_vec3 --
|
||||
2019-08-02T11:21:03 2019-08-02T11:21:03 2019-08-02T11:21:03
|
||||
|
||||
-- !sql_vec4 --
|
||||
2019-08-04T22:21:03 2019-08-04T22:21:03 2019-08-04T22:21:03
|
||||
|
||||
-- !sql --
|
||||
2012-11-30T23:59:59
|
||||
|
||||
|
||||
@ -52,6 +52,127 @@ suite("test_date_function") {
|
||||
|
||||
sql """ truncate table ${tableName} """
|
||||
|
||||
def timezoneCachedTableName = "test_convert_tz_with_timezone_cache"
|
||||
sql """ SET enable_vectorized_engine = false """
|
||||
sql """ DROP TABLE IF EXISTS ${timezoneCachedTableName} """
|
||||
sql """
|
||||
CREATE TABLE ${timezoneCachedTableName} (
|
||||
id int,
|
||||
test_datetime datetime NULL COMMENT "",
|
||||
origin_tz VARCHAR(255),
|
||||
target_tz VARCHAR(255)
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(id)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"in_memory" = "false",
|
||||
"storage_format" = "V2"
|
||||
)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO ${timezoneCachedTableName} VALUES
|
||||
(1, "2019-08-01 13:21:03", "Asia/Shanghai", "Asia/Shanghai"),
|
||||
(2, "2019-08-01 13:21:03", "Asia/Singapore", "Asia/Shanghai"),
|
||||
(3, "2019-08-01 13:21:03", "Asia/Taipei", "Asia/Shanghai"),
|
||||
(4, "2019-08-02 13:21:03", "Australia/Queensland", "Asia/Shanghai"),
|
||||
(5, "2019-08-02 13:21:03", "Australia/Lindeman", "Asia/Shanghai"),
|
||||
(6, "2019-08-03 13:21:03", "America/Aruba", "Asia/Shanghai"),
|
||||
(7, "2019-08-03 13:21:03", "America/Blanc-Sablon", "Asia/Shanghai"),
|
||||
(8, "2019-08-04 13:21:03", "America/Dawson", "Africa/Lusaka"),
|
||||
(9, "2019-08-04 13:21:03", "America/Creston", "Africa/Lusaka"),
|
||||
(10, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Shanghai"),
|
||||
(11, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Singapore"),
|
||||
(12, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Taipei"),
|
||||
(13, "2019-08-06 13:21:03", "Asia/Shanghai", "Australia/Queensland"),
|
||||
(14, "2019-08-06 13:21:03", "Asia/Shanghai", "Australia/Lindeman"),
|
||||
(15, "2019-08-07 13:21:03", "Asia/Shanghai", "America/Aruba"),
|
||||
(16, "2019-08-07 13:21:03", "Asia/Shanghai", "America/Blanc-Sablon"),
|
||||
(17, "2019-08-08 13:21:03", "Africa/Lusaka", "America/Dawson"),
|
||||
(18, "2019-08-08 13:21:03", "Africa/Lusaka", "America/Creston")
|
||||
"""
|
||||
|
||||
sql "set parallel_fragment_exec_instance_num = 8"
|
||||
|
||||
qt_sql1 """
|
||||
SELECT
|
||||
`id`, `test_datetime`, `origin_tz`, `target_tz`, convert_tz(`test_datetime`, `origin_tz`, `target_tz`)
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
ORDER BY `id`
|
||||
"""
|
||||
qt_sql2 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "Asia/Singapore", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 2;
|
||||
"""
|
||||
qt_sql3 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "Australia/Queensland", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 4;
|
||||
"""
|
||||
qt_sql4 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "America/Dawson", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Africa/Lusaka")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 8;
|
||||
"""
|
||||
|
||||
sql """ SET enable_vectorized_engine = true """
|
||||
qt_sql_vec1 """
|
||||
SELECT
|
||||
`id`, `test_datetime`, `origin_tz`, `target_tz`, convert_tz(`test_datetime`, `origin_tz`, `target_tz`)
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
ORDER BY `id`
|
||||
"""
|
||||
qt_sql_vec2 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "Asia/Singapore", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 2;
|
||||
"""
|
||||
qt_sql_vec3 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "Australia/Queensland", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 4;
|
||||
"""
|
||||
qt_sql_vec4 """
|
||||
SELECT
|
||||
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
|
||||
convert_tz(`test_datetime`, "America/Dawson", `target_tz`),
|
||||
convert_tz(`test_datetime`, `origin_tz`, "Africa/Lusaka")
|
||||
FROM
|
||||
${timezoneCachedTableName}
|
||||
WHERE
|
||||
id = 8;
|
||||
"""
|
||||
|
||||
// curdate,current_date
|
||||
String curdate_str = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
|
||||
def curdate_result = sql """ SELECT CURDATE() """
|
||||
|
||||
Reference in New Issue
Block a user