[Enhance] Add prepare phase for some timestamp functions (#3947)
Fix: #3946 CL: 1. Add prepare phase for `from_unixtime()`, `date_format()` and `convert_tz()` functions, to handle the format string once for all. 2. Find the cctz timezone when init `runtime state`, so that don't need to find timezone for each rows. 3. Add constant rewrite rule for `utc_timestamp()` 4. Add doc for `to_date()` 5. Comment out the `push_handler_test`, it can not run in DEBUG mode, will be fixed later. 6. Remove `timezone_db.h/cpp` and add `timezone_utils.h/cpp` The performance shows bellow: 11,000,000 rows SQL1: `select count(from_unixtime(k1)) from tbl1;` Before: 8.85s After: 2.85s SQL2: `select count(from_unixtime(k1, '%Y-%m-%d %H:%i:%s')) from tbl1 limit 1;` Before: 10.73s After: 4.85s The date string format seems still slow, we may need a further enhancement about it.
This commit is contained in:
@ -19,13 +19,13 @@
|
||||
|
||||
#include "exprs/expr.h"
|
||||
#include "exprs/anyval_util.h"
|
||||
#include "exprs/timezone_db.h"
|
||||
#include "runtime/tuple_row.h"
|
||||
#include "runtime/datetime_value.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "util/path_builder.h"
|
||||
#include "runtime/string_value.hpp"
|
||||
#include "util/debug_util.h"
|
||||
#include "util/path_builder.h"
|
||||
#include "util/timezone_utils.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -451,20 +451,79 @@ BigIntVal TimestampFunctions::timestamp_diff(FunctionContext* ctx, const DateTim
|
||||
}
|
||||
}
|
||||
|
||||
void TimestampFunctions::format_prepare(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL
|
||||
|| context->get_num_args() < 2
|
||||
|| context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR
|
||||
|| !context->is_arg_constant(1)) {
|
||||
VLOG(10) << "format_prepare returned";
|
||||
return;
|
||||
}
|
||||
|
||||
FormatCtx* fc = new FormatCtx();
|
||||
context->set_function_state(scope, fc);
|
||||
|
||||
StringVal* format = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
|
||||
if (UNLIKELY(format->is_null)) {
|
||||
fc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
fc->fmt = convert_format(context, *format);
|
||||
int format_len = DateTimeValue::compute_format_len((const char*) fc->fmt.ptr, fc->fmt.len);
|
||||
if (UNLIKELY(format_len >= 128)) {
|
||||
fc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
fc->is_valid = true;
|
||||
return;
|
||||
}
|
||||
|
||||
void TimestampFunctions::format_close(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL) {
|
||||
return;
|
||||
}
|
||||
|
||||
FormatCtx* fc = reinterpret_cast<FormatCtx*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (fc != nullptr) {
|
||||
delete fc;
|
||||
}
|
||||
}
|
||||
|
||||
StringVal TimestampFunctions::date_format(
|
||||
FunctionContext* ctx, const DateTimeVal& ts_val, const StringVal& format) {
|
||||
if (ts_val.is_null || format.is_null) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
DateTimeValue ts_value = DateTimeValue::from_datetime_val(ts_val);
|
||||
if (ts_value.compute_format_len((const char*)format.ptr, format.len) >= 128) {
|
||||
FormatCtx* fc = reinterpret_cast<FormatCtx*>(ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (UNLIKELY(fc == nullptr)) {
|
||||
// prepare phase failed, calculate at runtime
|
||||
StringVal new_fmt = convert_format(ctx, format);
|
||||
if (DateTimeValue::compute_format_len((const char*) new_fmt.ptr, new_fmt.len) >= 128) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
char buf[128];
|
||||
if (!ts_value.to_format_string((const char*) new_fmt.ptr, new_fmt.len, buf)) {
|
||||
return StringVal::null();
|
||||
}
|
||||
return AnyValUtil::from_string_temp(ctx, buf);
|
||||
}
|
||||
|
||||
if (!fc->is_valid) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
StringVal new_fmt = convert_format(ctx, format);
|
||||
|
||||
char buf[128];
|
||||
if (!ts_value.to_format_string((const char*)new_fmt.ptr, new_fmt.len, buf)) {
|
||||
if (!ts_value.to_format_string((const char*) fc->fmt.ptr, fc->fmt.len, buf)) {
|
||||
return StringVal::null();
|
||||
}
|
||||
return AnyValUtil::from_string_temp(ctx, buf);
|
||||
@ -520,14 +579,15 @@ DateTimeVal TimestampFunctions::timestamp(
|
||||
return val;
|
||||
}
|
||||
|
||||
// FROM_UNIXTIME()
|
||||
// FROM_UNIXTIME() without format
|
||||
StringVal TimestampFunctions::from_unix(
|
||||
FunctionContext* context, const IntVal& unix_time) {
|
||||
if (unix_time.is_null || unix_time.val < 0 || unix_time.val > INT_MAX) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
DateTimeValue dtv;
|
||||
if (!dtv.from_unixtime(unix_time.val, context->impl()->state()->timezone())) {
|
||||
if (!dtv.from_unixtime(unix_time.val, context->impl()->state()->timezone_obj())) {
|
||||
return StringVal::null();
|
||||
}
|
||||
char buf[64];
|
||||
@ -535,21 +595,35 @@ StringVal TimestampFunctions::from_unix(
|
||||
return AnyValUtil::from_string_temp(context, buf);
|
||||
}
|
||||
|
||||
// FROM_UNIXTIME()
|
||||
// FROM_UNIXTIME() with format
|
||||
StringVal TimestampFunctions::from_unix(
|
||||
FunctionContext* context, const IntVal& unix_time, const StringVal& fmt) {
|
||||
if (unix_time.is_null || fmt.is_null || unix_time.val < 0 || unix_time.val > INT_MAX) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
DateTimeValue dtv;
|
||||
if (!dtv.from_unixtime(unix_time.val, context->impl()->state()->timezone())) {
|
||||
if (!dtv.from_unixtime(unix_time.val, context->impl()->state()->timezone_obj())) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
StringVal new_fmt = convert_format(context, fmt);
|
||||
FormatCtx* fc = reinterpret_cast<FormatCtx*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (UNLIKELY(fc == nullptr)) {
|
||||
// prepare phase failed, calculate at runtime
|
||||
StringVal new_fmt = convert_format(context, fmt);
|
||||
char buf[128];
|
||||
if (!dtv.to_format_string((const char*)new_fmt.ptr, new_fmt.len, buf)) {
|
||||
return StringVal::null();
|
||||
}
|
||||
return AnyValUtil::from_string_temp(context, buf);
|
||||
}
|
||||
|
||||
if (!fc->is_valid) {
|
||||
return StringVal::null();
|
||||
}
|
||||
|
||||
char buf[128];
|
||||
if (!dtv.to_format_string((const char*)new_fmt.ptr, new_fmt.len, buf)) {
|
||||
if (!dtv.to_format_string((const char*) fc->fmt.ptr, fc->fmt.len, buf)) {
|
||||
return StringVal::null();
|
||||
}
|
||||
return AnyValUtil::from_string_temp(context, buf);
|
||||
@ -564,7 +638,7 @@ IntVal TimestampFunctions::to_unix(FunctionContext* context) {
|
||||
IntVal TimestampFunctions::to_unix(
|
||||
FunctionContext* context, const DateTimeValue& ts_value) {
|
||||
int64_t timestamp;
|
||||
if(!ts_value.unix_timestamp(×tamp, context->impl()->state()->timezone())) {
|
||||
if(!ts_value.unix_timestamp(×tamp, context->impl()->state()->timezone_obj())) {
|
||||
return IntVal::null();
|
||||
} else {
|
||||
//To compatible to mysql, timestamp not between 1970-01-01 00:00:00 ~ 2038-01-01 00:00:00 return 0
|
||||
@ -611,7 +685,7 @@ DateTimeVal TimestampFunctions::utc_timestamp(FunctionContext* context) {
|
||||
DateTimeVal TimestampFunctions::now(FunctionContext* context) {
|
||||
DateTimeValue dtv;
|
||||
if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
|
||||
context->impl()->state()->timezone())) {
|
||||
context->impl()->state()->timezone_obj())) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
|
||||
@ -623,7 +697,7 @@ DateTimeVal TimestampFunctions::now(FunctionContext* context) {
|
||||
DoubleVal TimestampFunctions::curtime(FunctionContext* context) {
|
||||
DateTimeValue dtv;
|
||||
if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
|
||||
context->impl()->state()->timezone())) {
|
||||
context->impl()->state()->timezone_obj())) {
|
||||
return DoubleVal::null();
|
||||
}
|
||||
|
||||
@ -633,7 +707,7 @@ DoubleVal TimestampFunctions::curtime(FunctionContext* context) {
|
||||
DateTimeVal TimestampFunctions::curdate(FunctionContext* context) {
|
||||
DateTimeValue dtv;
|
||||
if (!dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
|
||||
context->impl()->state()->timezone())) {
|
||||
context->impl()->state()->timezone_obj())) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
dtv.set_type(TIME_DATE);
|
||||
@ -643,20 +717,77 @@ DateTimeVal TimestampFunctions::curdate(FunctionContext* context) {
|
||||
return return_val;
|
||||
}
|
||||
|
||||
void TimestampFunctions::convert_tz_prepare(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL
|
||||
|| context->get_num_args() != 3
|
||||
|| context->get_arg_type(1)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR
|
||||
|| context->get_arg_type(2)->type != doris_udf::FunctionContext::Type::TYPE_VARCHAR
|
||||
|| !context->is_arg_constant(1)
|
||||
|| !context->is_arg_constant(2)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ConvertTzCtx* ctc = new ConvertTzCtx();
|
||||
context->set_function_state(scope, ctc);
|
||||
|
||||
// find from timezone
|
||||
StringVal* from = reinterpret_cast<StringVal*>(context->get_constant_arg(1));
|
||||
if (UNLIKELY(from->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*) from->ptr, from->len), ctc->from_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// find to timezone
|
||||
StringVal* to = reinterpret_cast<StringVal*>(context->get_constant_arg(2));
|
||||
if (UNLIKELY(to->is_null)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
if (!TimezoneUtils::find_cctz_time_zone(std::string((char*) to->ptr, to->len), ctc->to_tz)) {
|
||||
ctc->is_valid = false;
|
||||
return;
|
||||
}
|
||||
|
||||
ctc->is_valid = true;
|
||||
return;
|
||||
}
|
||||
|
||||
DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeVal& ts_val,
|
||||
const StringVal& from_tz, const StringVal& to_tz) {
|
||||
if (TimezoneDatabase::find_timezone(std::string((char *)from_tz.ptr, from_tz.len)) == nullptr ||
|
||||
TimezoneDatabase::find_timezone(std::string((char *)to_tz.ptr, to_tz.len)) == nullptr
|
||||
) {
|
||||
const DateTimeValue &ts_value = DateTimeValue::from_datetime_val(ts_val);
|
||||
ConvertTzCtx* ctc = reinterpret_cast<ConvertTzCtx*>(ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (UNLIKELY(ctc == nullptr)) {
|
||||
int64_t timestamp;
|
||||
if(!ts_value.unix_timestamp(×tamp, std::string((char *)from_tz.ptr, from_tz.len))) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
DateTimeValue ts_value2;
|
||||
if (!ts_value2.from_unixtime(timestamp, std::string((char *)to_tz.ptr, to_tz.len))) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
|
||||
DateTimeVal return_val;
|
||||
ts_value2.to_datetime_val(&return_val);
|
||||
return return_val;
|
||||
}
|
||||
|
||||
if (!ctc->is_valid) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
const DateTimeValue &ts_value = DateTimeValue::from_datetime_val(ts_val);
|
||||
|
||||
int64_t timestamp;
|
||||
if(!ts_value.unix_timestamp(×tamp, std::string((char *)from_tz.ptr, from_tz.len))) {
|
||||
if(!ts_value.unix_timestamp(×tamp, ctc->from_tz)) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
DateTimeValue ts_value2;
|
||||
if (!ts_value2.from_unixtime(timestamp, std::string((char *)to_tz.ptr, to_tz.len))) {
|
||||
if (!ts_value2.from_unixtime(timestamp, ctc->to_tz)) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
|
||||
@ -665,4 +796,17 @@ DateTimeVal TimestampFunctions::convert_tz(FunctionContext* ctx, const DateTimeV
|
||||
return return_val;
|
||||
}
|
||||
|
||||
void TimestampFunctions::convert_tz_close(
|
||||
doris_udf::FunctionContext* context,
|
||||
doris_udf::FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::FRAGMENT_LOCAL) {
|
||||
return;
|
||||
}
|
||||
|
||||
ConvertTzCtx* ctc = reinterpret_cast<ConvertTzCtx*>(context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
if (ctc != nullptr) {
|
||||
delete ctc;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user