// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "runtime/datetime_value.h" #include #include #include #include #include #include "common/logging.h" namespace doris { const char* DateTimeValue::_s_llvm_class_name = "class.doris::DateTimeValue"; const uint64_t log_10_int[] = { 1, 10, 100, 1000, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, 1000000000UL, 10000000000UL, 100000000000UL }; static int s_days_in_month[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; static const char* s_month_name[] = {"", "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", NULL}; static const char* s_ab_month_name[] = {"", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL}; static const char* s_day_name[] = {"Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", NULL}; static const char* s_ab_day_name[] = {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", NULL}; uint8_t mysql_week_mode(uint32_t mode) { mode &= 7; if (!(mode & WEEK_MONDAY_FIRST)) { mode ^= WEEK_FIRST_WEEKDAY; } return mode; } static bool is_leap(uint32_t year) { return ((year % 4) == 0) && ((year % 100 != 0) || ((year % 400) == 0 && year)); } static uint32_t calc_days_in_year(uint32_t year) { return is_leap(year) ? 366 : 365; } DateTimeValue DateTimeValue::_s_min_datetime_value(0, TIME_DATETIME, 0, 0, 0, 0, 0, 1, 1); DateTimeValue DateTimeValue::_s_max_datetime_value(0, TIME_DATETIME, 23, 59, 59, 0, 9999, 12, 31); // jint length_of_str(DateTimeValue& value) { // j if (_type == TIME_DATE) { // j return 10; // j } else { // j int extra_len = (_microsecond == 0) ? 0 : 7; // j if (_type == TIME_DATETIME) { // j return 19 + extra_len; // j } else { // j // TIME // j return 8 + extra_len + _neg // j + (_hour > 100) ? 1 : 0; // j } // j } // j} bool DateTimeValue::check_range() const { return _year > 9999 || _month > 12 || _day > 31 || _hour > (_type == TIME_TIME ? TIME_MAX_HOUR : 23) || _minute > 59 || _second > 59 || _microsecond > 999999; } bool DateTimeValue::check_date() const { if (_month == 0 || _day == 0) { return true; } if (_day > s_days_in_month[_month]) { // Feb 29 in leap year is valid. if (_month == 2 && _day == 29 && is_leap(_year)) { return false; } return true; } return false; } // The interval format is that with no delimiters // YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format // 0 1 2 3 4 5 6 7 bool DateTimeValue::from_date_str(const char* date_str, int len) { const char* ptr = date_str; const char* end = date_str + len; // ONLY 2, 6 can follow by a sapce const static int allow_space_mask = 4 | 64; const static int MAX_DATE_PARTS = 8; uint32_t date_val[MAX_DATE_PARTS]; int32_t date_len[MAX_DATE_PARTS]; _neg = false; // Skip space character while (ptr < end && isspace(*ptr)) { ptr++; } if (ptr == end || !isdigit(*ptr)) { return false; } // Fix year length const char* pos = ptr; while (pos < end && (isdigit(*pos) || *pos == 'T')) { pos++; } int year_len = 4; int digits = pos - ptr; bool is_interval_format = false; // Compatible with MySQL. Shit!!! // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years if (pos == end || *pos == '.') { if (digits == 4 || digits == 8 || digits >= 14) { year_len = 4; } else { year_len = 2; } is_interval_format = true; } int field_idx = 0; int field_len = year_len; while (ptr < end && isdigit(*ptr) && field_idx < MAX_DATE_PARTS - 1) { const char* start = ptr; int temp_val = 0; bool scan_to_delim = (!is_interval_format) && (field_idx != 6); while (ptr < end && isdigit(*ptr) && (scan_to_delim || field_len--)) { temp_val = temp_val * 10 + (*ptr++ - '0'); } // Imposible if (temp_val > 999999L) { return false; } date_val[field_idx] = temp_val; date_len[field_idx] = ptr - start; field_len = 2; if (ptr == end) { field_idx++; break; } if (field_idx == 2 && *ptr == 'T') { // YYYYMMDDTHHMMDD, skip 'T' and continue ptr++; field_idx++; continue; } // Second part if (field_idx == 5) { if (*ptr == '.') { ptr++; field_len = 6; } else if (isdigit(*ptr)) { field_idx++; break; } field_idx++; continue; } // escape separator while (ptr < end && (ispunct(*ptr) || isspace(*ptr))) { if (isspace(*ptr)) { if (((1 << field_idx) & allow_space_mask) == 0) { return false; } } ptr++; } field_idx++; } int num_field = field_idx; if (num_field <= 3) { _type = TIME_DATE; } else { _type = TIME_DATETIME; } if (!is_interval_format) { year_len = date_len[0]; } for (; field_idx < MAX_DATE_PARTS; ++field_idx) { date_len[field_idx] = 0; date_val[field_idx] = 0; } _year = date_val[0]; _month = date_val[1]; _day = date_val[2]; _hour = date_val[3]; _minute = date_val[4]; _second = date_val[5]; _microsecond = date_val[6]; if (_microsecond && date_len[6] < 6) { _microsecond *= log_10_int[6 - date_len[6]]; } if (year_len == 2) { if (_year < YY_PART_YEAR) { _year += 2000; } else { _year += 1900; } } if (num_field < 3 || check_range()) { return false; } if (check_date()) { return false; } return true; } // [0, 101) invalid // [101, (YY_PART_YEAR - 1) * 10000 + 1231] for two digits year 2000 ~ 2069 // [(YY_PART_YEAR - 1) * 10000 + 1231, YY_PART_YEAR * 10000L + 101) invalid // [YY_PART_YEAR * 10000L + 101, 991231] for two digits year 1970 ~1999 // (991231, 10000101) invalid, because support 1000-01-01 // [10000101, 99991231] for four digits year date value. // (99991231, 101000000) invalid, NOTE below this is datetime vaule hh:mm:ss must exist. // [101000000, (YY_PART_YEAR - 1)##1231235959] two digits year datetime value // ((YY_PART_YEAR - 1)##1231235959, YY_PART_YEAR##0101000000) invalid // ((YY_PART_YEAR)##1231235959, 99991231235959] two digits year datetime value 1970 ~ 1999 // (999991231235959, ~) valid int64_t DateTimeValue::standardlize_timevalue(int64_t value) { _type = TIME_DATE; if (value <= 0) { return 0; } if (value >= 10000101000000L) { // 9999-99-99 99:99:99 if (value > 99999999999999L) { return 0; } // between 1000-01-01 00:00:00L and 9999-99-99 99:99:99 // all digits exist. _type = TIME_DATETIME; return value; } // 2000-01-01 if (value < 101) { return 0; } // two digits year. 2000 ~ 2069 if (value <= (YY_PART_YEAR - 1) * 10000L + 1231L) { return (value + 20000000L) * 1000000L; } // two digits year, invalid date if (value < YY_PART_YEAR * 10000L + 101) { return 0; } // two digits year. 1970 ~ 1999 if (value <= 991231L) { return (value + 19000000L) * 1000000L; } // TODO(zhaochun): Don't allow year betwen 1000-01-01 if (value < 10000101) { return 0; } // four digits years without hour. if (value <= 99991231L) { return value * 1000000L; } // below 0000-01-01 if (value < 101000000) { return 0; } // below is with datetime, must have hh:mm:ss _type = TIME_DATETIME; // 2000 ~ 2069 if (value <= (YY_PART_YEAR - 1) * 10000000000L + 1231235959L) { return value + 20000000000000L; } if (value < YY_PART_YEAR * 10000000000L + 101000000L) { return 0; } // 1970 ~ 1999 if (value <= 991231235959L) { return value + 19000000000000L; } return value; } bool DateTimeValue::from_date_int64(int64_t value) { _neg = false; value = standardlize_timevalue(value); if (value <= 0) { return false; } uint64_t date = value / 1000000; uint64_t time = value % 1000000; _year = date / 10000; date %= 10000; _month = date / 100; _day = date % 100; _hour = time / 10000; time %= 10000; _minute = time / 100; _second = time % 100; _microsecond = 0; if (check_range() || check_date()) { return false; } return true; } void DateTimeValue::set_zero(int type) { memset(this, 0, sizeof(*this)); _type = type; } void DateTimeValue::set_type(int type) { _type = type; } void DateTimeValue::set_max_time(bool neg) { set_zero(TIME_TIME); _hour = TIME_MAX_HOUR; _minute = TIME_MAX_MINUTE; _second = TIME_MAX_SECOND; _neg = neg; } bool DateTimeValue::from_time_int64(int64_t value) { _type = TIME_TIME; if (value > TIME_MAX_VALUE) { // 0001-01-01 00:00:00 to convert to a datetime if (value > 10000000000L) { if (from_date_int64(value)) { return true; } } set_max_time(false); return false; } else if (value < -1 * TIME_MAX_VALUE) { set_max_time(true); return false; } if (value < 0) { _neg = 1; value = -value; } _hour = value / 10000; value %= 10000; _minute = value / 100; if (_minute > TIME_MAX_MINUTE) { return false; } _second = value % 100; if (_second > TIME_MAX_SECOND) { return false; } return true; } char* DateTimeValue::append_date_string(char *to) const { uint32_t temp; // Year temp = _year / 100; *to++ = (char) ('0' + (temp / 10)); *to++ = (char) ('0' + (temp % 10)); temp = _year % 100; *to++ = (char) ('0' + (temp / 10)); *to++ = (char) ('0' + (temp % 10)); *to++ = '-'; // Month *to++ = (char) ('0' + (_month / 10)); *to++ = (char) ('0' + (_month % 10)); *to++ = '-'; // Day *to++ = (char) ('0' + (_day / 10)); *to++ = (char) ('0' + (_day % 10)); return to; } char* DateTimeValue::append_time_string(char *to) const { if (_neg) { *to++ = '-'; } // Hour uint32_t temp = _hour; if (temp >= 100) { *to++ = (char) ('0' + (temp / 100)); temp %= 100; } *to++ = (char) ('0' + (temp / 10)); *to++ = (char) ('0' + (temp % 10)); *to++ = ':'; // Minute *to++ = (char) ('0' + (_minute / 10)); *to++ = (char) ('0' + (_minute % 10)); *to++ = ':'; /* Second */ *to++ = (char) ('0' + (_second / 10)); *to++ = (char) ('0' + (_second % 10)); if (_microsecond > 0) { *to++ = '.'; uint32_t first = _microsecond / 10000; uint32_t second = (_microsecond % 10000) / 100; uint32_t third = _microsecond % 100; *to++ = (char) ('0' + first / 10); *to++ = (char) ('0' + first % 10); *to++ = (char) ('0' + second / 10); *to++ = (char) ('0' + second % 10); *to++ = (char) ('0' + third / 10); *to++ = (char) ('0' + third % 10); } return to; } char* DateTimeValue::to_datetime_string(char* to) const { to = append_date_string(to); *to++ = ' '; to = append_time_string(to); *to++ = '\0'; return to; } char* DateTimeValue::to_date_string(char* to) const { to = append_date_string(to); *to++ = '\0'; return to; } char* DateTimeValue::to_time_string(char* to) const { to = append_time_string(to); *to++ = '\0'; return to; } char* DateTimeValue::to_string(char* to) const { switch (_type) { case TIME_TIME: to = to_time_string(to); break; case TIME_DATE: to = to_date_string(to); break; case TIME_DATETIME: to = to_datetime_string(to); break; default: *to++ = '\0'; break; } return to; } int64_t DateTimeValue::to_datetime_int64() const { return (_year * 10000L + _month * 100 + _day) * 1000000L + _hour * 10000 + _minute * 100 + _second; } int64_t DateTimeValue::to_date_int64() const { return _year * 10000 + _month * 100 + _day; } int64_t DateTimeValue::to_time_int64() const { int sign = _neg == 0 ? 1 : -1; return sign * (_hour * 10000 + _minute * 100 + _second); } int64_t DateTimeValue::to_int64() const { switch (_type) { case TIME_TIME: return to_time_int64(); case TIME_DATE: return to_date_int64(); case TIME_DATETIME: return to_datetime_int64(); default: return 0; } } bool DateTimeValue::get_date_from_daynr(uint64_t daynr) { if (daynr <= 0 || daynr > DATE_MAX_DAYNR) { return false; } _year = daynr / 365; uint32_t days_befor_year = 0; while (daynr < (days_befor_year = calc_daynr(_year, 1, 1))) { _year--; } uint32_t days_of_year = daynr - days_befor_year + 1; int leap_day = 0; if (is_leap(_year)) { if (days_of_year > 31 + 28) { days_of_year--; if (days_of_year == 31 + 28) { leap_day = 1; } } } _month = 1; while (days_of_year > s_days_in_month[_month]) { days_of_year -= s_days_in_month[_month]; _month++; } _day = days_of_year + leap_day; return true; } bool DateTimeValue::from_date_daynr(uint64_t daynr) { _neg = false; if (!get_date_from_daynr(daynr)) { return false; } _hour = 0; _minute = 0; _second = 0; _microsecond = 0; _type = TIME_DATE; return true; } // Following code is stolen from MySQL. uint64_t DateTimeValue::calc_daynr(uint32_t year, uint32_t month, uint32_t day) { uint64_t delsum = 0; int y = year; if (year == 0 && month == 0) { return 0; } /* Cast to int to be able to handle month == 0 */ delsum = 365 * y + 31 * (month - 1) + day; if (month <= 2) { // No leap year y--; } else { // This is great!!! // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 // 0, 0, 3, 3, 4, 4, 5, 5, 5, 6, 7, 8 delsum -= (month * 4 + 23) / 10; } // Every 400 year has 97 leap year, 100, 200, 300 are not leap year. return delsum + y / 4 - y / 100 + y / 400; } static char* int_to_str(uint64_t val, char* to) { char buf[64]; char* ptr = buf; // Use do/while for 0 value do { *ptr++ = '0' + (val % 10); val /= 10; } while (val); while (ptr > buf) { *to++ = *--ptr; } return to; } static char* append_string(const char* from, char* to) { while (*from) { *to++ = *from++; } return to; } static char* append_with_prefix(const char* str, int str_len, char prefix, int full_len, char* to) { int len = (str_len > full_len) ? str_len : full_len; len -= str_len; while (len-- > 0) { // push prefix; *to++ = prefix; } while (str_len-- > 0) { *to++ = *str++; } return to; } int DateTimeValue::compute_format_len(const char* format, int len) const { int size = 0; const char* ptr = format; const char* end = format + len; while (ptr < end) { if (*ptr != '%' || (ptr + 1) < end) { size++; ptr++; continue; } switch (*++ptr) { case 'M': case 'W': size += 10; break; case 'D': case 'Y': case 'x': case 'X': size += 4; break; case 'a': case 'b': size += 10; break; case 'j': size += 3; break; case 'u': case 'U': case 'v': case 'V': case 'y': case 'm': case 'd': case 'h': case 'i': case 'I': case 'l': case 'p': case 'S': case 's': case 'c': case 'e': size += 2; break; case 'k': case 'H': size += 7; break; case 'r': size += 11; break; case 'T': size += 8; break; case 'f': size += 6; break; case 'w': case '%': default: size++; break; } } return size; } bool DateTimeValue::to_format_string(const char* format, int len, char* to) const { char buf[64]; char* pos = NULL; const char* ptr = format; const char* end = format + len; char ch = '\0'; while (ptr < end) { if (*ptr != '%' || (ptr + 1) == end) { *to++ = *ptr++; continue; } // Skip '%' ptr++; switch (ch = *ptr++) { case 'a': // Abbreviated weekday name if (_type == TIME_TIME || (_year == 0 && _month == 0)) { return false; } to = append_string(s_ab_day_name[weekday()], to); break; case 'b': // Abbreviated month name if (_month == 0) { return false; } to = append_string(s_ab_month_name[_month], to); break; case 'c': // Month, numeric (0...12) pos = int_to_str(_month, buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); break; case 'd': // Day of month (00...31) pos = int_to_str(_day, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) pos = int_to_str(_day, buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); if (_day >= 10 && _day <= 19) { to = append_string("th", to); } else { switch (_day % 10) { case 1: to = append_string("st", to); break; case 2: to = append_string("nd", to); break; case 3: to = append_string("rd", to); break; default: to = append_string("th", to); break; } } break; case 'e': // Day of the month, numeric (0..31) pos = int_to_str(_day, buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); break; case 'f': // Microseconds (000000..999999) pos = int_to_str(_microsecond, buf); to = append_with_prefix(buf, pos - buf, '0', 6, to); break; case 'h': case 'I': // Hour (01..12) pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'H': // Hour (00..23) pos = int_to_str(_hour, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'i': // Minutes, numeric (00..59) pos = int_to_str(_minute, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'j': // Day of year (001..366) pos = int_to_str(daynr() - calc_daynr(_year, 1, 1) + 1, buf); to = append_with_prefix(buf, pos - buf, '0', 3, to); break; case 'k': // Hour (0..23) pos = int_to_str(_hour, buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); break; case 'l': // Hour (1..12) pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); break; case 'm': // Month, numeric (00..12) pos = int_to_str(_month, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'M': // Month name (January..December) if (_month == 0) { return false; } to = append_string(s_month_name[_month], to); break; case 'p': // AM or PM if ((_hour % 24) >= 12) { to = append_string("PM", to); } else { to = append_string("AM", to); } break; case 'r': // Time, 12-hour (hh:mm:ss followed by AM or PM) *to++ = (char) ('0' + (((_hour + 11) % 12 + 1) / 10)); *to++ = (char) ('0' + (((_hour + 11) % 12 + 1) % 10)); *to++ = ':'; // Minute *to++ = (char) ('0' + (_minute / 10)); *to++ = (char) ('0' + (_minute % 10)); *to++ = ':'; /* Second */ *to++ = (char) ('0' + (_second / 10)); *to++ = (char) ('0' + (_second % 10)); if ((_hour % 24) >= 12) { to = append_string(" PM", to); } else { to = append_string(" AM", to); } break; case 's': case 'S': // Seconds (00..59) pos = int_to_str(_second, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'T': // Time, 24-hour (hh:mm:ss) *to++ = (char) ('0' + ((_hour % 24) / 10)); *to++ = (char) ('0' + ((_hour % 24) % 10)); *to++ = ':'; // Minute *to++ = (char) ('0' + (_minute / 10)); *to++ = (char) ('0' + (_minute % 10)); *to++ = ':'; /* Second */ *to++ = (char) ('0' + (_second / 10)); *to++ = (char) ('0' + (_second % 10)); break; case 'u': // Week (00..53), where Monday is the first day of the week; // WEEK() mode 1 if (_type == TIME_TIME) { return false; } pos = int_to_str(week(mysql_week_mode(1)), buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'U': // Week (00..53), where Sunday is the first day of the week; // WEEK() mode 0 if (_type == TIME_TIME) { return false; } pos = int_to_str(week(mysql_week_mode(0)), buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'v': // Week (01..53), where Monday is the first day of the week; // WEEK() mode 3; used with %x if (_type == TIME_TIME) { return false; } pos = int_to_str(week(mysql_week_mode(3)), buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'V': // Week (01..53), where Sunday is the first day of the week; // WEEK() mode 2; used with %X if (_type == TIME_TIME) { return false; } pos = int_to_str(week(mysql_week_mode(2)), buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'w': // Day of the week (0=Sunday..6=Saturday) if (_type == TIME_TIME || (_month == 0 && _year == 0)) { return false; } pos = int_to_str(calc_weekday(daynr(), true), buf); to = append_with_prefix(buf, pos - buf, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) to = append_string(s_day_name[weekday()], to); break; case 'x': { // Year for the week, where Monday is the first day of the week, // numeric, four digits; used with %v if (_type == TIME_TIME) { return false; } uint32_t year = 0; calc_week(*this, mysql_week_mode(3), &year); pos = int_to_str(year, buf); to = append_with_prefix(buf, pos - buf, '0', 4, to); break; } case 'X': { // Year for the week where Sunday is the first day of the week, // numeric, four digits; used with %V if (_type == TIME_TIME) { return false; } uint32_t year = 0; calc_week(*this, mysql_week_mode(2), &year); pos = int_to_str(year, buf); to = append_with_prefix(buf, pos - buf, '0', 4, to); break; } case 'y': // Year, numeric (two digits) pos = int_to_str(_year % 100, buf); to = append_with_prefix(buf, pos - buf, '0', 2, to); break; case 'Y': // Year, numeric, four digits pos = int_to_str(_year, buf); to = append_with_prefix(buf, pos - buf, '0', 4, to); break; default: *to++ = ch; break; } } *to++ = '\0'; return true; } uint8_t DateTimeValue::calc_week(const DateTimeValue& value, uint8_t mode, uint32_t *year) { bool monday_first = mode & WEEK_MONDAY_FIRST; bool week_year = mode & WEEK_YEAR; bool first_weekday = mode & WEEK_FIRST_WEEKDAY; uint64_t day_nr = value.daynr(); uint64_t daynr_first_day = calc_daynr(value._year, 1, 1); uint8_t weekday_first_day = calc_weekday(daynr_first_day, !monday_first); int days = 0; *year = value._year; // Check wether the first days of this year belongs to last year if (value._month == 1 && value._day <= (7 - weekday_first_day)) { if (!week_year && ((first_weekday && weekday_first_day != 0) || (!first_weekday && weekday_first_day > 3))) { return 0; } (*year)--; week_year = true; daynr_first_day -= (days = calc_days_in_year(*year)); weekday_first_day = (weekday_first_day + 53 * 7 - days) % 7; } // How many days since first week if ((first_weekday && weekday_first_day != 0) || (!first_weekday && weekday_first_day > 3)) { // days in new year belongs to last year. days = day_nr - (daynr_first_day + (7 - weekday_first_day)); } else { // days in new year belongs to this year. days = day_nr - (daynr_first_day - weekday_first_day); } if (week_year && days >= 52 * 7) { weekday_first_day = (weekday_first_day + calc_days_in_year(*year)) % 7; if ((first_weekday && weekday_first_day == 0) || (!first_weekday && weekday_first_day <= 3)) { // Belong to next year. (*year)++; return 1; } } return days / 7 + 1; } uint8_t DateTimeValue::week(uint8_t mode) const { uint32_t year = 0; return calc_week(*this, mode, &year); } uint8_t DateTimeValue::calc_weekday(uint64_t day_nr, bool is_sunday_first_day) { return (day_nr + 5L + (is_sunday_first_day ? 1L : 0L)) % 7; } // TODO(zhaochun): Think endptr is NULL // Return true if convert to a integer success. Otherwise false. static bool str_to_int64(const char* ptr, const char** endptr, int64_t *ret) { const static uint64_t MAX_NEGATIVE_NUMBER = 0x8000000000000000; const static uint64_t ULONGLONG_MAX = ~0; const static uint64_t LFACTOR2 = 100000000000ULL; const char* end = *endptr; uint64_t cutoff_1 = 0; uint64_t cutoff_2 = 0; uint64_t cutoff_3 = 0; // Skip space while (ptr < end && (*ptr == ' ' || *ptr == '\t')) { ptr++; } if (ptr >= end) { return false; } // Sign bool neg = false; if (*ptr == '-') { neg = true; ptr++; cutoff_1 = MAX_NEGATIVE_NUMBER / LFACTOR2; cutoff_2 = (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; cutoff_3 = (MAX_NEGATIVE_NUMBER % LFACTOR2) % 100; } else { if (*ptr == '+') { ptr++; } cutoff_1 = ULONGLONG_MAX / LFACTOR2; cutoff_2 = (ULONGLONG_MAX % LFACTOR2) / 100; cutoff_3 = (ULONGLONG_MAX % LFACTOR2) % 100; } if (ptr >= end) { return false; } // Skip '0' while (ptr < end && *ptr == '0') { ptr++; } const char* n_end = ptr + 9; if (n_end > end) { n_end = end; } uint64_t value_1 = 0; while (ptr < n_end && isdigit(*ptr)) { value_1 *= 10; value_1 += *ptr++ - '0'; } if (ptr == end || !isdigit(*ptr)) { *endptr = ptr; *ret = neg ? -value_1 : value_1; return true; } // TODO uint64_t value_2 = 0; uint64_t value_3 = 0; // Check overflow. if (value_1 > cutoff_1 || (value_1 == cutoff_1 && (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3)))) { return false; } return true; } static int min(int a, int b) { return a < b ? a : b; } static int find_in_lib(const char* lib[], const char* str, const char* end) { int pos = 0; int find_count = 0; int find_pos = 0; for (; lib[pos] != NULL; ++pos) { const char* i = str; const char* j = lib[pos]; while (i < end && *j) { if (toupper(*i) != toupper(*j)) { break; } ++i; ++j; } if (i == end) { if (*j == '\0') { return pos; } else { find_count++; find_pos = pos; } } } return find_count == 1 ? find_pos : -1; } static int check_word(const char* lib[], const char* str, const char* end, const char** endptr) { const char* ptr = str; while (ptr < end && isalpha(*ptr)) { ptr++; } int pos = find_in_lib(lib, str, ptr); if (pos >= 0) { *endptr = ptr; } return pos; } bool DateTimeValue::from_date_format_str( const char* format, int format_len, const char* value, int value_len, const char** sub_val_end) { const char* ptr = format; const char* end = format + format_len; const char* val = value; const char* val_end = value + value_len; bool date_part_used = false; bool time_part_used = false; bool frac_part_used = false; int day_part = 0; int weekday = -1; int yearday = -1; int week_num = -1; bool strict_week_number = false; bool sunday_first = false; bool strict_week_number_year_type = false; int strict_week_number_year = -1; bool usa_time = false; while (ptr < end && val < val_end) { // Skip space character while (val < val_end && isspace(*val)) { val++; } if (val >= val_end) { break; } // Check switch if (*ptr == '%' && ptr + 1 < end) { const char* tmp = NULL; int64_t int_value = 0; ptr++; switch (*ptr++) { // Year case 'y': // Year, numeric (two digits) tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } int_value += int_value >= 70 ? 1900 : 2000; _year = int_value; val = tmp; date_part_used = true; break; case 'Y': // Year, numeric, four digits tmp = val + min(4, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } if (tmp - val <= 2) { int_value += int_value >= 70 ? 1900 : 2000; } _year = int_value; val = tmp; date_part_used = true; break; // Month case 'm': case 'c': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _month = int_value; val = tmp; date_part_used = true; break; case 'M': int_value = check_word(s_month_name, val, val_end, &val); if (int_value < 0) { return false; } _month = int_value; break; case 'b': int_value = check_word(s_ab_month_name, val, val_end, &val); if (int_value < 0) { return false; } _month = int_value; break; // Day case 'd': case 'e': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _day = int_value; val = tmp; date_part_used = true; break; case 'D': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _day = int_value; val = tmp + min(2, val_end - tmp); date_part_used = true; break; // Hour case 'h': case 'I': case 'l': usa_time = true; // Fall through case 'k': case 'H': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _hour = int_value; val = tmp; time_part_used = true; break; // Minute case 'i': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _minute = int_value; val = tmp; time_part_used = true; break; // Second case 's': case 'S': tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } _second = int_value; val = tmp; time_part_used = true; break; // Micro second case 'f': tmp = val + min(6, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } int_value *= log_10_int[6 - (tmp - val)]; _microsecond = int_value; val = tmp; frac_part_used = true; break; // AM/PM case 'p': if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) { return false; } if (toupper(*val) == 'P') { // PM day_part = 12; } time_part_used = true; val += 2; break; // Weekday case 'W': int_value = check_word(s_day_name, val, val_end, &val); if (int_value < 0) { return false; } int_value++; weekday = int_value; date_part_used = true; break; case 'a': int_value = check_word(s_ab_day_name, val, val_end, &val); if (int_value < 0) { return false; } int_value++; weekday = int_value; date_part_used = true; break; case 'w': tmp = val + min(1, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } if (int_value >= 7) { return false; } if (int_value == 0) { int_value = 7; } weekday = int_value; val = tmp; date_part_used = true; break; case 'j': tmp = val + min(3, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } yearday = int_value; val = tmp; date_part_used = true; break; case 'u': case 'v': case 'U': case 'V': sunday_first = (*(ptr - 1) == 'U' || *(ptr - 1) == 'V'); // Used to check if there is %x or %X strict_week_number = (*(ptr - 1) == 'V' || *(ptr - 1) == 'v'); tmp = val + min(2, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } week_num = int_value; if (week_num > 53 || (strict_week_number && week_num == 0)) { return false; } val = tmp; date_part_used = true; break; // strict week number, must be used with %V or %v case 'x': case 'X': strict_week_number_year_type = (*(ptr - 1) == 'X'); tmp = val + min(4, val_end - val); if (!str_to_int64(val, &tmp, &int_value)) { return false; } strict_week_number_year = int_value; val = tmp; date_part_used = true; break; case 'r': if (from_date_format_str("%I:%i:%S %p", 11, val, val_end - val, &tmp)) { return false; } val = tmp; time_part_used = true; break; case 'T': if (from_date_format_str("%H:%i:%S", 8, val, val_end - val, &tmp)) { return false; } time_part_used = true; val = tmp; break; case '.': while (val < val_end && ispunct(*val)) { val++; } break; case '@': while (val < val_end && isalpha(*val)) { val++; } break; case '#': while (val < val_end && isdigit(*val)) { val++; } break; default: return false; } } else if (!isspace(*ptr)) { if (*ptr != *val) { return false; } ptr++; val++; } else { ptr++; } } if (usa_time) { if (_hour > 12 || _hour < 1) { return false; } _hour = (_hour % 12) + day_part; } if (sub_val_end) { *sub_val_end = val; return 0; } // Year day if (yearday > 0) { uint64_t days = calc_daynr(_year, 1, 1) + yearday - 1; if (!get_date_from_daynr(days)) { return false; } } // weekday if (week_num >= 0 && weekday > 0) { // Check if ((strict_week_number && (strict_week_number_year < 0 || strict_week_number_year_type != sunday_first)) || (!strict_week_number && strict_week_number_year >= 0)) { return false; } uint64_t days = calc_daynr(strict_week_number ? strict_week_number_year : _year, 1, 1); uint8_t weekday_b = calc_weekday(days, sunday_first); if (sunday_first) { days += ((weekday_b == 0) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday % 7; } else { days += ((weekday_b <= 3) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday - 1; } if (!get_date_from_daynr(days)) { return false; } } // Compute timestamp type if (frac_part_used) { if (date_part_used) { _type = TIME_DATETIME; } else { _type = TIME_TIME; } } else { if (date_part_used) { if (time_part_used) { _type = TIME_DATETIME; } else { _type = TIME_DATE; } } else { _type = TIME_TIME; } } if (check_range() || check_date()) { return false; } _neg = false; return true; } bool DateTimeValue::date_add_interval(const TimeInterval& interval, TimeUnit unit) { int sign = interval.is_neg ? -1 : 1; switch (unit) { case MICROSECOND: case SECOND: case MINUTE: case HOUR: case SECOND_MICROSECOND: case MINUTE_MICROSECOND: case MINUTE_SECOND: case HOUR_MICROSECOND: case HOUR_SECOND: case HOUR_MINUTE: case DAY_MICROSECOND: case DAY_SECOND: case DAY_MINUTE: case DAY_HOUR: { // This may change the day information int64_t microseconds = _microsecond + sign * interval.microsecond; int64_t extra_second = microseconds / 1000000L; microseconds %= 1000000L; int64_t seconds = (_day - 1) * 86400L + _hour * 3600L + _minute * 60 + _second + sign * (interval.day * 86400 + interval.hour * 3600 + interval.minute * 60 + interval.second) + extra_second; if (microseconds < 0) { seconds--; microseconds += 1000000L; } int64_t days = seconds / 86400; seconds %= 86400L; if (seconds < 0) { seconds += 86400L; days--; } _microsecond = microseconds; _second = seconds % 60; _minute = (seconds / 60) % 60; _hour = seconds / 3600; int64_t day_nr = calc_daynr(_year, _month, 1) + days; if (!get_date_from_daynr(day_nr)) { return false; } _type = TIME_DATETIME; break; } case DAY: case WEEK: { // This only change day information, not change second information int64_t day_nr = daynr() + interval.day * sign; if (!get_date_from_daynr(day_nr)) { return false; } break; } case YEAR: { // This only change year information _year += sign * interval.year; if (_year > 9999) { return false; } if (_month == 2 && _day == 29 && !is_leap(_year)) { _day = 28; } break; } case MONTH: case QUARTER: case YEAR_MONTH: { // This will change month and year information, maybe date. int64_t months = _year * 12 + _month - 1 + sign * (12 * interval.year + interval.month); _year = months / 12; if (_year > 9999) { return false; } _month = (months % 12) + 1; if (_day > s_days_in_month[_month]) { _day = s_days_in_month[_month]; if (_month == 2 && is_leap(_year)) { _day++; } } break; } } return true; } bool DateTimeValue::unix_timestamp(int64_t* timestamp, const std::string& timezone) const{ boost::local_time::time_zone_ptr local_time_zone = TimezoneDatabase::find_timezone(timezone); if (local_time_zone == nullptr) { return false; } char buf[64]; char* to = to_datetime_string(buf); boost::posix_time::ptime pt = boost::posix_time::time_from_string(std::string(buf, to - buf -1)); boost::local_time::local_date_time lt(pt.date(), pt.time_of_day(), local_time_zone, boost::local_time::local_date_time::NOT_DATE_TIME_ON_ERROR); boost::posix_time::ptime utc_ptime = lt.utc_time(); boost::posix_time::ptime utc_start(boost::gregorian::date(1970, 1, 1)); boost::posix_time::time_duration dur = utc_ptime - utc_start; *timestamp = dur.total_milliseconds() / 1000; return true; } bool DateTimeValue::from_unixtime(int64_t timestamp, const std::string& timezone) { boost::local_time::time_zone_ptr local_time_zone = TimezoneDatabase::find_timezone(timezone); if (local_time_zone == nullptr) { return false; } boost::local_time::local_date_time lt(boost::posix_time::from_time_t(timestamp), local_time_zone); boost::posix_time::ptime local_ptime = lt.local_time(); _neg = 0; _type = TIME_DATETIME; _year = local_ptime.date().year(); _month = local_ptime.date().month(); _day = local_ptime.date().day(); _hour = local_ptime.time_of_day().hours(); _minute = local_ptime.time_of_day().minutes(); _second = local_ptime.time_of_day().seconds(); _microsecond = 0; return true; } const char* DateTimeValue::month_name() const { if (_month < 1 || _month > 12) { return NULL; } return s_month_name[_month]; } const char* DateTimeValue::day_name() const { int day = weekday(); if (day < 0 || day >= 7) { return NULL; } return s_day_name[day]; } DateTimeValue DateTimeValue::local_time() { DateTimeValue value; value.from_unixtime(time(NULL), TimezoneDatabase::default_time_zone); return value; } std::ostream& operator<<(std::ostream& os, const DateTimeValue& value) { char buf[64]; value.to_string(buf); return os << buf; } // NOTE: // only support DATE - DATE (no support DATETIME - DATETIME) std::size_t operator-(const DateTimeValue& v1, const DateTimeValue& v2) { return v1.daynr() - v2.daynr(); } std::size_t hash_value(DateTimeValue const& value) { return HashUtil::hash(&value, sizeof(DateTimeValue), 0); } }