Fix: #3946 CL: 1. Add prepare phase for `from_unixtime()`, `date_format()` and `convert_tz()` functions, to handle the format string once for all. 2. Find the cctz timezone when init `runtime state`, so that don't need to find timezone for each rows. 3. Add constant rewrite rule for `utc_timestamp()` 4. Add doc for `to_date()` 5. Comment out the `push_handler_test`, it can not run in DEBUG mode, will be fixed later. 6. Remove `timezone_db.h/cpp` and add `timezone_utils.h/cpp` The performance shows bellow: 11,000,000 rows SQL1: `select count(from_unixtime(k1)) from tbl1;` Before: 8.85s After: 2.85s SQL2: `select count(from_unixtime(k1, '%Y-%m-%d %H:%i:%s')) from tbl1 limit 1;` Before: 10.73s After: 4.85s The date string format seems still slow, we may need a further enhancement about it.
234 lines
12 KiB
C++
234 lines
12 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#ifndef DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H
|
|
#define DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H
|
|
|
|
#include <boost/date_time/posix_time/posix_time.hpp>
|
|
#include <boost/date_time/gregorian/gregorian.hpp>
|
|
#include <boost/date_time/time_zone_base.hpp>
|
|
#include <boost/date_time/local_time/local_time.hpp>
|
|
#include <boost/thread/thread.hpp>
|
|
#include "runtime/string_value.h"
|
|
#include "runtime/datetime_value.h"
|
|
|
|
namespace doris {
|
|
|
|
class Expr;
|
|
class OpcodeRegistry;
|
|
class TupleRow;
|
|
|
|
// The context used for timestamp function prepare phase,
|
|
// to save the converted date formatter, so that it doesn't
|
|
// need to be converted for each rows.
|
|
struct FormatCtx {
|
|
// false means the format is invalid, and the function always return null
|
|
bool is_valid = false;
|
|
StringVal fmt;
|
|
};
|
|
|
|
// The context used for convert tz
|
|
struct ConvertTzCtx {
|
|
// false means the format is invalid, and the function always return null
|
|
bool is_valid = false;
|
|
cctz::time_zone from_tz;
|
|
cctz::time_zone to_tz;
|
|
};
|
|
|
|
class TimestampFunctions {
|
|
public:
|
|
static void init();
|
|
|
|
// Functions to extract parts of the timestamp, return integers.
|
|
static doris_udf::IntVal year(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal quarter(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal month(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal day_of_week(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal day_of_month(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal day_of_year(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal week_of_year(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal hour(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal minute(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal second(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
|
|
|
|
// Date/time functions.
|
|
static doris_udf::DateTimeVal to_date(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::IntVal date_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1,
|
|
const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::DoubleVal time_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1,
|
|
const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::DateTimeVal years_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal years_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal months_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal months_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal weeks_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal weeks_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal days_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal days_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal hours_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal hours_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal minutes_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal minutes_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal seconds_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal seconds_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal micros_add(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::DateTimeVal micros_sub(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count);
|
|
static doris_udf::StringVal date_format(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::StringVal& format);
|
|
static doris_udf::DateTimeVal from_days(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::IntVal& days);
|
|
static doris_udf::IntVal to_days(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::DateTimeVal str_to_date(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::StringVal& str,
|
|
const doris_udf::StringVal& format);
|
|
static doris_udf::StringVal month_name(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
|
|
static doris_udf::StringVal day_name(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
|
|
|
|
// timestamp function
|
|
template <TimeUnit unit>
|
|
static doris_udf::BigIntVal timestamp_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal years_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal months_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal weeks_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal days_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal hours_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal minutes_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
static doris_udf::BigIntVal seconds_diff(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
|
|
|
|
// TimeZone correlation functions.
|
|
static doris_udf::DateTimeVal timestamp(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& val);
|
|
// Helper for add/sub functions on the time portion.
|
|
template <TimeUnit unit>
|
|
static doris_udf::DateTimeVal timestamp_time_op(
|
|
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
|
|
const doris_udf::IntVal& count, bool is_add);
|
|
static doris_udf::DateTimeVal now(doris_udf::FunctionContext* context);
|
|
static doris_udf::DoubleVal curtime(doris_udf::FunctionContext* context);
|
|
static doris_udf::DateTimeVal curdate(doris_udf::FunctionContext* context);
|
|
static doris_udf::DateTimeVal utc_timestamp(doris_udf::FunctionContext* context);
|
|
/// Returns the current time.
|
|
static doris_udf::IntVal to_unix(
|
|
FunctionContext* context, const DateTimeValue& ts_value);
|
|
static doris_udf::IntVal to_unix(doris_udf::FunctionContext* context);
|
|
/// Converts 'tv_val' to a unix time_t
|
|
static doris_udf::IntVal to_unix(
|
|
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& tv_val);
|
|
/// Parses 'string_val' based on the format 'fmt'.
|
|
static doris_udf::IntVal to_unix(
|
|
doris_udf::FunctionContext* context, const doris_udf::StringVal& string_val,
|
|
const doris_udf::StringVal& fmt);
|
|
/// Return a timestamp string from a unix time_t
|
|
/// Optional second argument is the format of the string.
|
|
/// TIME is the integer type of the unix time argument.
|
|
static doris_udf::StringVal from_unix(
|
|
doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time);
|
|
static doris_udf::StringVal from_unix(
|
|
doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time,
|
|
const doris_udf::StringVal& fmt);
|
|
static doris_udf::DateTimeVal convert_tz(doris_udf::FunctionContext* ctx,
|
|
const doris_udf::DateTimeVal& ts_val, const doris_udf::StringVal& from_tz,
|
|
const doris_udf::StringVal& to_tz);
|
|
|
|
// Helper function to check date/time format strings.
|
|
// TODO: eventually return format converted from Java to Boost.
|
|
static bool check_format(const StringVal& format, DateTimeValue& t);
|
|
|
|
// In order to support 0.11 grayscale upgrade
|
|
// Todo(kks): remove this method when 0.12 release
|
|
static StringVal convert_format(doris_udf::FunctionContext* ctx, const StringVal& format);
|
|
|
|
// Issue a warning for a bad format string.
|
|
static void report_bad_format(const StringVal* format);
|
|
|
|
static void format_prepare(
|
|
doris_udf::FunctionContext* context,
|
|
doris_udf::FunctionContext::FunctionStateScope scope);
|
|
|
|
static void format_close(
|
|
doris_udf::FunctionContext* context,
|
|
doris_udf::FunctionContext::FunctionStateScope scope);
|
|
|
|
static void convert_tz_prepare(
|
|
doris_udf::FunctionContext* context,
|
|
doris_udf::FunctionContext::FunctionStateScope scope);
|
|
|
|
static void convert_tz_close(
|
|
doris_udf::FunctionContext* context,
|
|
doris_udf::FunctionContext::FunctionStateScope scope);
|
|
};
|
|
}
|
|
|
|
#endif
|