[Enhance] Add prepare phase for some timestamp functions (#3947)

Fix: #3946 

CL:
1. Add prepare phase for `from_unixtime()`, `date_format()` and `convert_tz()` functions, to handle the format string once for all.
2. Find the cctz timezone when init `runtime state`, so that don't need to find timezone for each rows.
3. Add constant rewrite rule for `utc_timestamp()`
4. Add doc for `to_date()`
5. Comment out the `push_handler_test`, it can not run in DEBUG mode, will be fixed later.
6. Remove `timezone_db.h/cpp` and add `timezone_utils.h/cpp`

The performance shows bellow:

11,000,000 rows

SQL1: `select count(from_unixtime(k1)) from tbl1;`
Before: 8.85s
After: 2.85s

SQL2: `select count(from_unixtime(k1, '%Y-%m-%d %H:%i:%s')) from tbl1 limit 1;`
Before: 10.73s
After: 4.85s

The date string format seems still slow, we may need a further enhancement about it.
This commit is contained in:
Mingyu Chen
2020-06-29 19:15:09 +08:00
committed by GitHub
parent 9671394015
commit af1beb6ce4
21 changed files with 423 additions and 568 deletions

View File

@ -16,6 +16,7 @@
// under the License.
#include "runtime/datetime_value.h"
#include "util/timezone_utils.h"
#include <ctype.h>
#include <string.h>
@ -65,20 +66,6 @@ DateTimeValue DateTimeValue::_s_min_datetime_value(0, TIME_DATETIME, 0, 0, 0, 0,
DateTimeValue DateTimeValue::_s_max_datetime_value(0, TIME_DATETIME, 23, 59, 59, 0,
9999, 12, 31);
RE2 DateTimeValue::time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$");
// jint length_of_str(DateTimeValue& value) {
// j if (_type == TIME_DATE) {
// j return 10;
// j } else {
// j int extra_len = (_microsecond == 0) ? 0 : 7;
// j if (_type == TIME_DATETIME) {
// j return 19 + extra_len;
// j } else {
// j // TIME
// j return 8 + extra_len + _neg
// j + (_hour > 100) ? 1 : 0;
// j }
// j }
// j}
bool DateTimeValue::check_range() const {
return _year > 9999 || _month > 12 || _day > 31
@ -600,7 +587,7 @@ static char* append_with_prefix(const char* str, int str_len,
return to;
}
int DateTimeValue::compute_format_len(const char* format, int len) const {
int DateTimeValue::compute_format_len(const char* format, int len) {
int size = 0;
const char* ptr = format;
const char* end = format + len;
@ -1532,10 +1519,13 @@ bool DateTimeValue::date_add_interval(const TimeInterval& interval, TimeUnit uni
bool DateTimeValue::unix_timestamp(int64_t* timestamp, const std::string& timezone) const{
cctz::time_zone ctz;
if (!find_cctz_time_zone(timezone, ctz)) {
if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
return false;
}
return unix_timestamp(timestamp, ctz);
}
bool DateTimeValue::unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const{
const auto tp =
cctz::convert(cctz::civil_second(_year, _month, _day, _hour, _minute, _second), ctz);
*timestamp = tp.time_since_epoch().count();
@ -1544,10 +1534,13 @@ bool DateTimeValue::unix_timestamp(int64_t* timestamp, const std::string& timezo
bool DateTimeValue::from_unixtime(int64_t timestamp, const std::string& timezone) {
cctz::time_zone ctz;
if (!find_cctz_time_zone(timezone, ctz)) {
if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
return false;
}
return from_unixtime(timestamp, ctz);
}
bool DateTimeValue::from_unixtime(int64_t timestamp, const cctz::time_zone& ctz) {
static const cctz::time_point<cctz::sys_seconds> epoch =
std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));
cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(timestamp);
@ -1584,7 +1577,7 @@ const char* DateTimeValue::day_name() const {
DateTimeValue DateTimeValue::local_time() {
DateTimeValue value;
value.from_unixtime(time(NULL), TimezoneDatabase::default_time_zone);
value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone);
return value;
}