From 8aea31e38332664af61d92d356c444a144bfcb4e Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Wed, 20 Sep 2023 14:28:12 +0800 Subject: [PATCH] [fix](timezone) fix timezone parse when there is no tzfile (#24578) --- be/src/runtime/runtime_state.h | 1 + be/src/util/timezone_utils.cpp | 21 +++++--- be/src/util/timezone_utils.h | 4 ++ be/src/vec/functions/function_convert_tz.h | 2 +- be/test/testutil/function_utils.cpp | 10 ++-- be/test/testutil/function_utils.h | 3 +- be/test/vec/function/function_test_util.h | 7 ++- be/test/vec/function/function_time_test.cpp | 60 ++++++++++++++++++++- 8 files changed, 93 insertions(+), 15 deletions(-) diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index e862ef27a6..5275a21c64 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -94,6 +94,7 @@ public: const TQueryGlobals& query_globals, ExecEnv* exec_env); // for ut and non-query. + void set_exec_env(ExecEnv* exec_env) { _exec_env = exec_env; } void init_mem_trackers(const TUniqueId& id = TUniqueId(), const std::string& name = "unknown"); const TQueryOptions& query_options() const { return _query_options; } diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp index 14dd8b7e53..be8c282b3a 100644 --- a/be/src/util/timezone_utils.cpp +++ b/be/src/util/timezone_utils.cpp @@ -43,6 +43,12 @@ std::unordered_map TimezoneUtils::timezone_names_map_; bool TimezoneUtils::inited_ = false; const std::string TimezoneUtils::default_time_zone = "+08:00"; +static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change by TZDIR env var + +void TimezoneUtils::clear_timezone_names() { + timezone_names_map_.clear(); + inited_ = false; +} void TimezoneUtils::load_timezone_names() { if (inited_) { @@ -51,7 +57,6 @@ void TimezoneUtils::load_timezone_names() { inited_ = true; std::string path; - const char* tzdir = "/usr/share/zoneinfo"; char* tzdir_env = std::getenv("TZDIR"); if (tzdir_env && *tzdir_env) { tzdir = tzdir_env; @@ -210,7 +215,6 @@ void TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) { cache_list["CST"] = cctz::fixed_time_zone(cctz::seconds(8 * 3600)); std::string base_str; - const char* tzdir = "/usr/share/zoneinfo"; // default // try get from System char* tzdir_env = std::getenv("TZDIR"); if (tzdir_env && *tzdir_env) { @@ -221,6 +225,11 @@ void TimezoneUtils::load_timezones_to_cache(vectorized::ZoneList& cache_list) { base_str += '/'; const auto root_path = std::filesystem::path {base_str}; + if (!std::filesystem::exists(root_path)) { + LOG_WARNING("Cannot find system tzfile. Abandon to preload timezone cache."); + return; + } + std::set ignore_paths = {"posix", "right"}; // duplications for (std::filesystem::recursive_directory_iterator it {base_str}; it != end(it); it++) { @@ -295,11 +304,11 @@ bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_ tz_parsed = true; } else { auto it = timezone_names_map_.find(timezone_lower); - if (it == timezone_names_map_.end()) { - VLOG_DEBUG << "Illegal timezone " << timezone_lower; - return false; + if (it != timezone_names_map_.end()) { + tz_parsed = cctz::load_time_zone(it->second, &ctz); + } else { + tz_parsed = cctz::load_time_zone(timezone, &ctz); } - tz_parsed = cctz::load_time_zone(it->second, &ctz); } if (tz_parsed) { if (!have_both) { // GMT only diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h index 0f3a6dcc38..55f7eace20 100644 --- a/be/src/util/timezone_utils.h +++ b/be/src/util/timezone_utils.h @@ -37,8 +37,12 @@ class TimezoneUtils { public: static void load_timezone_names(); static void load_timezones_to_cache(vectorized::ZoneList& cache_list); + // we support to parse lower_case timezone name iff execution environment has timezone file static bool find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz); + // for ut only + static void clear_timezone_names(); + static const std::string default_time_zone; private: diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 8ff3505aca..d2db44c117 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -139,7 +139,7 @@ struct ConvertTZImpl { std::unique_lock lock_(cache_lock); //TODO: the lock upgrade could be done in find_... function only when we push value into the hashmap if (!TimezoneUtils::find_cctz_time_zone(from_tz, time_zone_cache[from_tz])) { - time_zone_cache.erase(to_tz); + time_zone_cache.erase(from_tz); result_null_map[index_now] = true; result_column->insert_default(); return; diff --git a/be/test/testutil/function_utils.cpp b/be/test/testutil/function_utils.cpp index 6a87bb7009..150d81f339 100644 --- a/be/test/testutil/function_utils.cpp +++ b/be/test/testutil/function_utils.cpp @@ -39,13 +39,17 @@ FunctionUtils::FunctionUtils() { FunctionUtils::FunctionUtils(const doris::TypeDescriptor& return_type, const std::vector& arg_types, - int varargs_buffer_size) { + int varargs_buffer_size, RuntimeState* state = nullptr) { TQueryGlobals globals; globals.__set_now_string("2019-08-06 01:38:57"); globals.__set_timestamp_ms(1565026737805); globals.__set_time_zone("Asia/Shanghai"); - _state = RuntimeState::create_unique(globals).release(); - _fn_ctx = FunctionContext::create_context(_state, return_type, arg_types); + if (state == nullptr) { + _state = RuntimeState::create_unique(globals).release(); + _fn_ctx = FunctionContext::create_context(_state, return_type, arg_types); + } else { + _fn_ctx = FunctionContext::create_context(state, return_type, arg_types); + } } FunctionUtils::~FunctionUtils() { diff --git a/be/test/testutil/function_utils.h b/be/test/testutil/function_utils.h index fbb641eb07..f952ae0ed2 100644 --- a/be/test/testutil/function_utils.h +++ b/be/test/testutil/function_utils.h @@ -29,7 +29,8 @@ class FunctionUtils { public: FunctionUtils(); FunctionUtils(const doris::TypeDescriptor& return_type, - const std::vector& arg_types, int varargs_buffer_size); + const std::vector& arg_types, int varargs_buffer_size, + RuntimeState*); ~FunctionUtils(); doris::FunctionContext* get_fn_ctx() { return _fn_ctx.get(); } diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index fc9fb8a60d..c543612439 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -29,6 +29,7 @@ #include "gtest/gtest_pred_impl.h" #include "olap/olap_common.h" #include "runtime/define_primitive_type.h" +#include "runtime/exec_env.h" #include "runtime/types.h" #include "testutil/any_type.h" #include "testutil/function_utils.h" @@ -199,9 +200,11 @@ void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types // Null values are represented by Null() // The type of the constant column is represented as follows: Consted {TypeIndex::String} // A DataSet with a constant column can only have one row of data +// If state != nullptr, should set query options you use for your own. template Status check_function(const std::string& func_name, const InputTypeSet& input_types, - const DataSet& data_set, bool expect_fail = false) { + const DataSet& data_set, bool expect_fail = false, + RuntimeState* state = nullptr) { // 1.0 create data type ut_type::UTDataTypeDescs descs; EXPECT_TRUE(parse_ut_data_type(input_types, descs)); @@ -270,7 +273,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE; } - FunctionUtils fn_utils(fn_ctx_return, arg_types, 0); + FunctionUtils fn_utils(fn_ctx_return, arg_types, 0, state); auto* fn_ctx = fn_utils.get_fn_ctx(); fn_ctx->set_constant_cols(constant_cols); func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL); diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index e6281bc8f4..ba4e352f82 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -23,12 +23,12 @@ #include "common/status.h" #include "function_test_util.h" -#include "gtest/gtest_pred_impl.h" +#include "runtime/runtime_state.h" #include "testutil/any_type.h" +#include "util/timezone_utils.h" #include "vec/core/types.h" #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" -#include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_type_time.h" @@ -201,6 +201,62 @@ TEST(VTimestampFunctionsTest, timediff_test) { check_function(func_name, input_types, data_set); } +TEST(VTimestampFunctionsTest, convert_tz_test) { + std::string func_name = "convert_tz"; + + ExecEnv* exec_env = ExecEnv::GetInstance(); + exec_env->_global_zone_cache = std::make_unique(); + auto test_state = RuntimeState::create_unique(); + test_state->set_exec_env(exec_env); + TimezoneUtils::clear_timezone_names(); + + InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String, TypeIndex::String}; + + { + DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/SHANGHAI"}, + std::string {"america/Los_angeles"}}, + Null()}}; + check_function(func_name, input_types, data_set, false, + test_state.get()); + } + + { + DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"UTC"}}, + str_to_datetime_v2("2019-07-31 18:18:27", "%Y-%m-%d %H:%i:%s.%f")}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"Utc"}}, + Null()}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"UTC"}}, + str_to_datetime_v2("2019-07-31 18:18:27", "%Y-%m-%d %H:%i:%s.%f")}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/SHANGHAI"}, + std::string {"america/Los_angeles"}}, + Null()}}; + check_function(func_name, input_types, data_set, false, + test_state.get()); + } + + { + DataSet data_set = {{{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"UTC"}}, + str_to_datetime_v2("2019-07-31 18:18:27", "%Y-%m-%d %H:%i:%s.%f")}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"Utc"}}, + str_to_datetime_v2("2019-07-31 18:18:27", "%Y-%m-%d %H:%i:%s.%f")}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/Shanghai"}, + std::string {"UTC"}}, + str_to_datetime_v2("2019-07-31 18:18:27", "%Y-%m-%d %H:%i:%s.%f")}, + {{std::string {"2019-08-01 02:18:27"}, std::string {"Asia/SHANGHAI"}, + std::string {"america/Los_angeles"}}, + str_to_datetime_v2("2019-07-31 11:18:27", "%Y-%m-%d %H:%i:%s.%f")}}; + TimezoneUtils::load_timezone_names(); + TimezoneUtils::load_timezones_to_cache(*exec_env->_global_zone_cache); + check_function(func_name, input_types, data_set, false, + test_state.get()); + } +} + TEST(VTimestampFunctionsTest, date_format_test) { std::string func_name = "date_format";