Files
doris/be/src/exprs/timestamp_functions.h
Mingyu Chen af1beb6ce4 [Enhance] Add prepare phase for some timestamp functions (#3947)
Fix: #3946 

CL:
1. Add prepare phase for `from_unixtime()`, `date_format()` and `convert_tz()` functions, to handle the format string once for all.
2. Find the cctz timezone when init `runtime state`, so that don't need to find timezone for each rows.
3. Add constant rewrite rule for `utc_timestamp()`
4. Add doc for `to_date()`
5. Comment out the `push_handler_test`, it can not run in DEBUG mode, will be fixed later.
6. Remove `timezone_db.h/cpp` and add `timezone_utils.h/cpp`

The performance shows bellow:

11,000,000 rows

SQL1: `select count(from_unixtime(k1)) from tbl1;`
Before: 8.85s
After: 2.85s

SQL2: `select count(from_unixtime(k1, '%Y-%m-%d %H:%i:%s')) from tbl1 limit 1;`
Before: 10.73s
After: 4.85s

The date string format seems still slow, we may need a further enhancement about it.
2020-06-29 19:15:09 +08:00

234 lines
12 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H
#define DORIS_BE_SRC_QUERY_EXPRS_TIMESTAMP_FUNCTIONS_H
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/time_zone_base.hpp>
#include <boost/date_time/local_time/local_time.hpp>
#include <boost/thread/thread.hpp>
#include "runtime/string_value.h"
#include "runtime/datetime_value.h"
namespace doris {
class Expr;
class OpcodeRegistry;
class TupleRow;
// The context used for timestamp function prepare phase,
// to save the converted date formatter, so that it doesn't
// need to be converted for each rows.
struct FormatCtx {
// false means the format is invalid, and the function always return null
bool is_valid = false;
StringVal fmt;
};
// The context used for convert tz
struct ConvertTzCtx {
// false means the format is invalid, and the function always return null
bool is_valid = false;
cctz::time_zone from_tz;
cctz::time_zone to_tz;
};
class TimestampFunctions {
public:
static void init();
// Functions to extract parts of the timestamp, return integers.
static doris_udf::IntVal year(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal quarter(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal month(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal day_of_week(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal day_of_month(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal day_of_year(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal week_of_year(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal hour(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal minute(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal second(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& ts_val);
// Date/time functions.
static doris_udf::DateTimeVal to_date(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
static doris_udf::IntVal date_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1,
const doris_udf::DateTimeVal& ts_val2);
static doris_udf::DoubleVal time_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1,
const doris_udf::DateTimeVal& ts_val2);
static doris_udf::DateTimeVal years_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal years_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal months_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal months_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal weeks_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal weeks_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal days_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal days_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal hours_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal hours_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal minutes_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal minutes_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal seconds_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal seconds_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal micros_add(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::DateTimeVal micros_sub(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count);
static doris_udf::StringVal date_format(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::StringVal& format);
static doris_udf::DateTimeVal from_days(
doris_udf::FunctionContext* ctx, const doris_udf::IntVal& days);
static doris_udf::IntVal to_days(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
static doris_udf::DateTimeVal str_to_date(
doris_udf::FunctionContext* ctx, const doris_udf::StringVal& str,
const doris_udf::StringVal& format);
static doris_udf::StringVal month_name(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
static doris_udf::StringVal day_name(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val);
// timestamp function
template <TimeUnit unit>
static doris_udf::BigIntVal timestamp_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal years_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal months_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal weeks_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal days_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal hours_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal minutes_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
static doris_udf::BigIntVal seconds_diff(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val1, const doris_udf::DateTimeVal& ts_val2);
// TimeZone correlation functions.
static doris_udf::DateTimeVal timestamp(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& val);
// Helper for add/sub functions on the time portion.
template <TimeUnit unit>
static doris_udf::DateTimeVal timestamp_time_op(
doris_udf::FunctionContext* ctx, const doris_udf::DateTimeVal& ts_val,
const doris_udf::IntVal& count, bool is_add);
static doris_udf::DateTimeVal now(doris_udf::FunctionContext* context);
static doris_udf::DoubleVal curtime(doris_udf::FunctionContext* context);
static doris_udf::DateTimeVal curdate(doris_udf::FunctionContext* context);
static doris_udf::DateTimeVal utc_timestamp(doris_udf::FunctionContext* context);
/// Returns the current time.
static doris_udf::IntVal to_unix(
FunctionContext* context, const DateTimeValue& ts_value);
static doris_udf::IntVal to_unix(doris_udf::FunctionContext* context);
/// Converts 'tv_val' to a unix time_t
static doris_udf::IntVal to_unix(
doris_udf::FunctionContext* context, const doris_udf::DateTimeVal& tv_val);
/// Parses 'string_val' based on the format 'fmt'.
static doris_udf::IntVal to_unix(
doris_udf::FunctionContext* context, const doris_udf::StringVal& string_val,
const doris_udf::StringVal& fmt);
/// Return a timestamp string from a unix time_t
/// Optional second argument is the format of the string.
/// TIME is the integer type of the unix time argument.
static doris_udf::StringVal from_unix(
doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time);
static doris_udf::StringVal from_unix(
doris_udf::FunctionContext* context, const doris_udf::IntVal& unix_time,
const doris_udf::StringVal& fmt);
static doris_udf::DateTimeVal convert_tz(doris_udf::FunctionContext* ctx,
const doris_udf::DateTimeVal& ts_val, const doris_udf::StringVal& from_tz,
const doris_udf::StringVal& to_tz);
// Helper function to check date/time format strings.
// TODO: eventually return format converted from Java to Boost.
static bool check_format(const StringVal& format, DateTimeValue& t);
// In order to support 0.11 grayscale upgrade
// Todo(kks): remove this method when 0.12 release
static StringVal convert_format(doris_udf::FunctionContext* ctx, const StringVal& format);
// Issue a warning for a bad format string.
static void report_bad_format(const StringVal* format);
static void format_prepare(
doris_udf::FunctionContext* context,
doris_udf::FunctionContext::FunctionStateScope scope);
static void format_close(
doris_udf::FunctionContext* context,
doris_udf::FunctionContext::FunctionStateScope scope);
static void convert_tz_prepare(
doris_udf::FunctionContext* context,
doris_udf::FunctionContext::FunctionStateScope scope);
static void convert_tz_close(
doris_udf::FunctionContext* context,
doris_udf::FunctionContext::FunctionStateScope scope);
};
}
#endif