[improvement](function) add timezone cache for convert_tz (#14616)

This commit is contained in:
Jerry Hu
2022-11-29 17:00:54 +08:00
committed by GitHub
parent 1713af6cd6
commit a60490651f
5 changed files with 286 additions and 28 deletions

View File

@ -915,39 +915,43 @@ void TimestampFunctions::convert_tz_prepare(doris_udf::FunctionContext* context,
doris_udf::FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::FRAGMENT_LOCAL || context->get_num_args() != 3 ||
context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR ||
context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR ||
!context->is_arg_constant(1) || !context->is_arg_constant(2)) {
context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR) {
return;
}
ConvertTzCtx* ctc = new ConvertTzCtx();
context->set_function_state(scope, ctc);
// find from timezone
StringVal* from = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
if (UNLIKELY(from->is_null)) {
ctc->is_valid = false;
return;
}
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)from->ptr, from->len),
ctc->from_tz)) {
ctc->is_valid = false;
return;
if (context->is_arg_constant(1)) {
// find from timezone
StringVal* from = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
if (UNLIKELY(from->is_null)) {
ctc->is_valid = false;
return;
}
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)from->ptr, from->len),
ctc->from_tz)) {
ctc->is_valid = false;
return;
}
ctc->constant_from = true;
}
// find to timezone
StringVal* to = reinterpret_cast<StringVal*>(context->get_constant_arg(2));
if (UNLIKELY(to->is_null)) {
ctc->is_valid = false;
return;
}
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)to->ptr, to->len), ctc->to_tz)) {
ctc->is_valid = false;
return;
if (context->is_arg_constant(2)) {
// find to timezone
StringVal* to = reinterpret_cast<StringVal*>(context->get_constant_arg(2));
if (UNLIKELY(to->is_null)) {
ctc->is_valid = false;
return;
}
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*)to->ptr, to->len), ctc->to_tz)) {
ctc->is_valid = false;
return;
}
ctc->constant_to = true;
}
ctc->is_valid = true;
return;
}
DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeVal& ts_val,
@ -975,12 +979,40 @@ DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeV
}
int64_t timestamp;
if (!ts_value.unix_timestamp(&timestamp, ctc->from_tz)) {
return DateTimeVal::null();
if (ctc->constant_from) {
if (!ts_value.unix_timestamp(&timestamp, ctc->from_tz)) {
return DateTimeVal::null();
}
} else {
auto from_tz_string = from_tz.to_string();
if (UNLIKELY(ctc->time_zone_cache.find(from_tz_string) == ctc->time_zone_cache.cend())) {
if (UNLIKELY(!TimezoneUtils::find_cctz_time_zone(
from_tz_string, ctc->time_zone_cache[from_tz_string]))) {
return DateTimeVal::null();
}
}
if (!ts_value.unix_timestamp(&timestamp, ctc->time_zone_cache[from_tz_string])) {
return DateTimeVal::null();
}
}
DateTimeValue ts_value2;
if (!ts_value2.from_unixtime(timestamp, ctc->to_tz)) {
return DateTimeVal::null();
if (ctc->constant_to) {
if (!ts_value2.from_unixtime(timestamp, ctc->to_tz)) {
return DateTimeVal::null();
}
} else {
auto to_tz_string = to_tz.to_string();
if (UNLIKELY(ctc->time_zone_cache.find(to_tz_string) == ctc->time_zone_cache.cend())) {
if (UNLIKELY(!TimezoneUtils::find_cctz_time_zone(to_tz_string,
ctc->time_zone_cache[to_tz_string]))) {
return DateTimeVal::null();
}
}
if (!ts_value2.from_unixtime(timestamp, ctc->time_zone_cache[to_tz_string])) {
return DateTimeVal::null();
}
}
DateTimeVal return_val;

View File

@ -43,8 +43,11 @@ struct FormatCtx {
struct ConvertTzCtx {
// false means the format is invalid, and the function always return null
bool is_valid = false;
bool constant_from = false;
bool constant_to = false;
cctz::time_zone from_tz;
cctz::time_zone to_tz;
std::map<std::string, cctz::time_zone> time_zone_cache;
};
class TimestampFunctions {

View File

@ -26,6 +26,10 @@
namespace doris::vectorized {
struct ConvertTzCtx {
std::map<StringRef, cctz::time_zone> time_zone_cache;
};
template <typename DateValueType, typename ArgType>
struct ConvertTZImpl {
using ColumnType = std::conditional_t<
@ -47,6 +51,10 @@ struct ConvertTZImpl {
const ColumnString* from_tz_column, const ColumnString* to_tz_column,
ReturnColumnType* result_column, NullMap& result_null_map,
size_t input_rows_count) {
auto convert_ctx = reinterpret_cast<ConvertTzCtx*>(
context->get_function_state(FunctionContext::FunctionStateScope::THREAD_LOCAL));
std::map<StringRef, cctz::time_zone> time_zone_cache_;
auto& time_zone_cache = convert_ctx ? convert_ctx->time_zone_cache : time_zone_cache_;
for (size_t i = 0; i < input_rows_count; i++) {
if (result_null_map[i]) {
result_column->insert_default();
@ -60,14 +68,32 @@ struct ConvertTZImpl {
binary_cast<NativeType, DateValueType>(date_column->get_element(i));
int64_t timestamp;
if (!ts_value.unix_timestamp(&timestamp, from_tz.to_string())) {
if (time_zone_cache.find(from_tz) == time_zone_cache.cend()) {
if (!TimezoneUtils::find_cctz_time_zone(from_tz.to_string(),
time_zone_cache[from_tz])) {
result_null_map[i] = true;
result_column->insert_default();
continue;
}
}
if (time_zone_cache.find(to_tz) == time_zone_cache.cend()) {
if (!TimezoneUtils::find_cctz_time_zone(to_tz.to_string(),
time_zone_cache[to_tz])) {
result_null_map[i] = true;
result_column->insert_default();
continue;
}
}
if (!ts_value.unix_timestamp(&timestamp, time_zone_cache[from_tz])) {
result_null_map[i] = true;
result_column->insert_default();
continue;
}
ReturnDateType ts_value2;
if (!ts_value2.from_unixtime(timestamp, to_tz.to_string())) {
if (!ts_value2.from_unixtime(timestamp, time_zone_cache[to_tz])) {
result_null_map[i] = true;
result_column->insert_default();
continue;
@ -112,6 +138,24 @@ public:
bool use_default_implementation_for_constants() const override { return true; }
bool use_default_implementation_for_nulls() const override { return false; }
Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope != FunctionContext::THREAD_LOCAL) {
return Status::OK();
}
context->set_function_state(scope, new ConvertTzCtx);
return Status::OK();
}
Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if (scope == FunctionContext::THREAD_LOCAL) {
auto* convert_ctx = reinterpret_cast<ConvertTzCtx*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
delete convert_ctx;
context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);

View File

@ -23,6 +23,64 @@
-- !sql --
\N
-- !sql1 --
1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03
2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03
3 2019-08-01T13:21:03 Asia/Taipei Asia/Shanghai 2019-08-01T13:21:03
4 2019-08-02T13:21:03 Australia/Queensland Asia/Shanghai 2019-08-02T11:21:03
5 2019-08-02T13:21:03 Australia/Lindeman Asia/Shanghai 2019-08-02T11:21:03
6 2019-08-03T13:21:03 America/Aruba Asia/Shanghai 2019-08-04T01:21:03
7 2019-08-03T13:21:03 America/Blanc-Sablon Asia/Shanghai 2019-08-04T01:21:03
8 2019-08-04T13:21:03 America/Dawson Africa/Lusaka 2019-08-04T22:21:03
9 2019-08-04T13:21:03 America/Creston Africa/Lusaka 2019-08-04T22:21:03
10 2019-08-05T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-05T13:21:03
11 2019-08-05T13:21:03 Asia/Shanghai Asia/Singapore 2019-08-05T13:21:03
12 2019-08-05T13:21:03 Asia/Shanghai Asia/Taipei 2019-08-05T13:21:03
13 2019-08-06T13:21:03 Asia/Shanghai Australia/Queensland 2019-08-06T15:21:03
14 2019-08-06T13:21:03 Asia/Shanghai Australia/Lindeman 2019-08-06T15:21:03
15 2019-08-07T13:21:03 Asia/Shanghai America/Aruba 2019-08-07T01:21:03
16 2019-08-07T13:21:03 Asia/Shanghai America/Blanc-Sablon 2019-08-07T01:21:03
17 2019-08-08T13:21:03 Africa/Lusaka America/Dawson 2019-08-08T04:21:03
18 2019-08-08T13:21:03 Africa/Lusaka America/Creston 2019-08-08T04:21:03
-- !sql2 --
2019-08-01T13:21:03 2019-08-01T13:21:03 2019-08-01T13:21:03
-- !sql3 --
2019-08-02T11:21:03 2019-08-02T11:21:03 2019-08-02T11:21:03
-- !sql4 --
2019-08-04T22:21:03 2019-08-04T22:21:03 2019-08-04T22:21:03
-- !sql_vec1 --
1 2019-08-01T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-01T13:21:03
2 2019-08-01T13:21:03 Asia/Singapore Asia/Shanghai 2019-08-01T13:21:03
3 2019-08-01T13:21:03 Asia/Taipei Asia/Shanghai 2019-08-01T13:21:03
4 2019-08-02T13:21:03 Australia/Queensland Asia/Shanghai 2019-08-02T11:21:03
5 2019-08-02T13:21:03 Australia/Lindeman Asia/Shanghai 2019-08-02T11:21:03
6 2019-08-03T13:21:03 America/Aruba Asia/Shanghai 2019-08-04T01:21:03
7 2019-08-03T13:21:03 America/Blanc-Sablon Asia/Shanghai 2019-08-04T01:21:03
8 2019-08-04T13:21:03 America/Dawson Africa/Lusaka 2019-08-04T22:21:03
9 2019-08-04T13:21:03 America/Creston Africa/Lusaka 2019-08-04T22:21:03
10 2019-08-05T13:21:03 Asia/Shanghai Asia/Shanghai 2019-08-05T13:21:03
11 2019-08-05T13:21:03 Asia/Shanghai Asia/Singapore 2019-08-05T13:21:03
12 2019-08-05T13:21:03 Asia/Shanghai Asia/Taipei 2019-08-05T13:21:03
13 2019-08-06T13:21:03 Asia/Shanghai Australia/Queensland 2019-08-06T15:21:03
14 2019-08-06T13:21:03 Asia/Shanghai Australia/Lindeman 2019-08-06T15:21:03
15 2019-08-07T13:21:03 Asia/Shanghai America/Aruba 2019-08-07T01:21:03
16 2019-08-07T13:21:03 Asia/Shanghai America/Blanc-Sablon 2019-08-07T01:21:03
17 2019-08-08T13:21:03 Africa/Lusaka America/Dawson 2019-08-08T04:21:03
18 2019-08-08T13:21:03 Africa/Lusaka America/Creston 2019-08-08T04:21:03
-- !sql_vec2 --
2019-08-01T13:21:03 2019-08-01T13:21:03 2019-08-01T13:21:03
-- !sql_vec3 --
2019-08-02T11:21:03 2019-08-02T11:21:03 2019-08-02T11:21:03
-- !sql_vec4 --
2019-08-04T22:21:03 2019-08-04T22:21:03 2019-08-04T22:21:03
-- !sql --
2012-11-30T23:59:59

View File

@ -52,6 +52,127 @@ suite("test_date_function") {
sql """ truncate table ${tableName} """
def timezoneCachedTableName = "test_convert_tz_with_timezone_cache"
sql """ SET enable_vectorized_engine = false """
sql """ DROP TABLE IF EXISTS ${timezoneCachedTableName} """
sql """
CREATE TABLE ${timezoneCachedTableName} (
id int,
test_datetime datetime NULL COMMENT "",
origin_tz VARCHAR(255),
target_tz VARCHAR(255)
) ENGINE=OLAP
DUPLICATE KEY(id)
COMMENT "OLAP"
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
sql """
INSERT INTO ${timezoneCachedTableName} VALUES
(1, "2019-08-01 13:21:03", "Asia/Shanghai", "Asia/Shanghai"),
(2, "2019-08-01 13:21:03", "Asia/Singapore", "Asia/Shanghai"),
(3, "2019-08-01 13:21:03", "Asia/Taipei", "Asia/Shanghai"),
(4, "2019-08-02 13:21:03", "Australia/Queensland", "Asia/Shanghai"),
(5, "2019-08-02 13:21:03", "Australia/Lindeman", "Asia/Shanghai"),
(6, "2019-08-03 13:21:03", "America/Aruba", "Asia/Shanghai"),
(7, "2019-08-03 13:21:03", "America/Blanc-Sablon", "Asia/Shanghai"),
(8, "2019-08-04 13:21:03", "America/Dawson", "Africa/Lusaka"),
(9, "2019-08-04 13:21:03", "America/Creston", "Africa/Lusaka"),
(10, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Shanghai"),
(11, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Singapore"),
(12, "2019-08-05 13:21:03", "Asia/Shanghai", "Asia/Taipei"),
(13, "2019-08-06 13:21:03", "Asia/Shanghai", "Australia/Queensland"),
(14, "2019-08-06 13:21:03", "Asia/Shanghai", "Australia/Lindeman"),
(15, "2019-08-07 13:21:03", "Asia/Shanghai", "America/Aruba"),
(16, "2019-08-07 13:21:03", "Asia/Shanghai", "America/Blanc-Sablon"),
(17, "2019-08-08 13:21:03", "Africa/Lusaka", "America/Dawson"),
(18, "2019-08-08 13:21:03", "Africa/Lusaka", "America/Creston")
"""
sql "set parallel_fragment_exec_instance_num = 8"
qt_sql1 """
SELECT
`id`, `test_datetime`, `origin_tz`, `target_tz`, convert_tz(`test_datetime`, `origin_tz`, `target_tz`)
FROM
${timezoneCachedTableName}
ORDER BY `id`
"""
qt_sql2 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "Asia/Singapore", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
FROM
${timezoneCachedTableName}
WHERE
id = 2;
"""
qt_sql3 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "Australia/Queensland", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
FROM
${timezoneCachedTableName}
WHERE
id = 4;
"""
qt_sql4 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "America/Dawson", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Africa/Lusaka")
FROM
${timezoneCachedTableName}
WHERE
id = 8;
"""
sql """ SET enable_vectorized_engine = true """
qt_sql_vec1 """
SELECT
`id`, `test_datetime`, `origin_tz`, `target_tz`, convert_tz(`test_datetime`, `origin_tz`, `target_tz`)
FROM
${timezoneCachedTableName}
ORDER BY `id`
"""
qt_sql_vec2 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "Asia/Singapore", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
FROM
${timezoneCachedTableName}
WHERE
id = 2;
"""
qt_sql_vec3 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "Australia/Queensland", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Asia/Shanghai")
FROM
${timezoneCachedTableName}
WHERE
id = 4;
"""
qt_sql_vec4 """
SELECT
convert_tz(`test_datetime`, `origin_tz`, `target_tz`),
convert_tz(`test_datetime`, "America/Dawson", `target_tz`),
convert_tz(`test_datetime`, `origin_tz`, "Africa/Lusaka")
FROM
${timezoneCachedTableName}
WHERE
id = 8;
"""
// curdate,current_date
String curdate_str = new SimpleDateFormat("yyyy-MM-dd").format(new Date())
def curdate_result = sql """ SELECT CURDATE() """