[Enhance] Add prepare phase for some timestamp functions (#3947)

Fix: #3946 

CL:
1. Add prepare phase for `from_unixtime()`, `date_format()` and `convert_tz()` functions, to handle the format string once for all.
2. Find the cctz timezone when init `runtime state`, so that don't need to find timezone for each rows.
3. Add constant rewrite rule for `utc_timestamp()`
4. Add doc for `to_date()`
5. Comment out the `push_handler_test`, it can not run in DEBUG mode, will be fixed later.
6. Remove `timezone_db.h/cpp` and add `timezone_utils.h/cpp`

The performance shows bellow:

11,000,000 rows

SQL1: `select count(from_unixtime(k1)) from tbl1;`
Before: 8.85s
After: 2.85s

SQL2: `select count(from_unixtime(k1, '%Y-%m-%d %H:%i:%s')) from tbl1 limit 1;`
Before: 10.73s
After: 4.85s

The date string format seems still slow, we may need a further enhancement about it.
This commit is contained in:
Mingyu Chen
2020-06-29 19:15:09 +08:00
committed by GitHub
parent 9671394015
commit af1beb6ce4
21 changed files with 423 additions and 568 deletions

View File

@ -26,7 +26,6 @@
#include "common/status.h"
#include "exec/exec_node.h"
#include "exprs/expr.h"
#include "exprs/timezone_db.h"
#include "runtime/buffered_block_mgr2.h"
#include "runtime/bufferpool/reservation_util.h"
#include "runtime/descriptors.h"
@ -41,6 +40,7 @@
#include "util/file_utils.h"
#include "util/pretty_printer.h"
#include "util/load_error_hub.h"
#include "util/timezone_utils.h"
#include "runtime/mem_tracker.h"
#include "runtime/bufferpool/reservation_tracker.h"
@ -110,7 +110,7 @@ RuntimeState::RuntimeState(const TQueryGlobals& query_globals)
_timezone = query_globals.time_zone;
_timestamp_ms = query_globals.timestamp_ms;
} else if (!query_globals.now_string.empty()) {
_timezone = TimezoneDatabase::default_time_zone;
_timezone = TimezoneUtils::default_time_zone;
DateTimeValue dt;
dt.from_date_str(query_globals.now_string.c_str(), query_globals.now_string.size());
int64_t timestamp;
@ -118,9 +118,10 @@ RuntimeState::RuntimeState(const TQueryGlobals& query_globals)
_timestamp_ms = timestamp * 1000;
} else {
//Unit test may set into here
_timezone = TimezoneDatabase::default_time_zone;
_timezone = TimezoneUtils::default_time_zone;
_timestamp_ms = 0;
}
TimezoneUtils::find_cctz_time_zone(_timezone, _timezone_obj);
}
RuntimeState::~RuntimeState() {
@ -184,7 +185,7 @@ Status RuntimeState::init(
_timezone = query_globals.time_zone;
_timestamp_ms = query_globals.timestamp_ms;
} else if (!query_globals.now_string.empty()) {
_timezone = TimezoneDatabase::default_time_zone;
_timezone = TimezoneUtils::default_time_zone;
DateTimeValue dt;
dt.from_date_str(query_globals.now_string.c_str(), query_globals.now_string.size());
int64_t timestamp;
@ -192,9 +193,11 @@ Status RuntimeState::init(
_timestamp_ms = timestamp * 1000;
} else {
//Unit test may set into here
_timezone = TimezoneDatabase::default_time_zone;
_timezone = TimezoneUtils::default_time_zone;
_timestamp_ms = 0;
}
TimezoneUtils::find_cctz_time_zone(_timezone, _timezone_obj);
_exec_env = exec_env;
if (_query_options.max_errors <= 0) {