From f1864d9fcf7476b889c9587258ee4215d09bd3f1 Mon Sep 17 00:00:00 2001 From: Mryange <59914473+Mryange@users.noreply.github.com> Date: Tue, 15 Aug 2023 15:30:48 +0800 Subject: [PATCH] [fix](function) fix str_to_date with specific format #22981 --- be/src/vec/functions/function_timestamp.cpp | 28 +++++-- .../data/correctness/test_str_to_date.out | 29 +++++++ .../correctness/test_str_to_date.groovy | 78 +++++++++++++++++++ 3 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 regression-test/data/correctness/test_str_to_date.out create mode 100644 regression-test/suites/correctness/test_str_to_date.groovy diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index a71062c483..1bca282f2e 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -74,6 +74,21 @@ struct StrToDate { return make_nullable(std::make_shared()); } + static StringRef rewrite_specific_format(const char* raw_str, size_t str_size) { + const static std::string specific_format_strs[3] = {"yyyyMMdd", "yyyy-MM-dd", + "yyyy-MM-dd HH:mm:ss"}; + const static std::string specific_format_rewrite[3] = {"%Y%m%d", "%Y-%m-%d", + "%Y-%m-%d %H:%i:%s"}; + for (int i = 0; i < 3; i++) { + const StringRef specific_format {specific_format_strs[i].data(), + specific_format_strs[i].size()}; + if (specific_format == StringRef {raw_str, str_size}) { + return {specific_format_rewrite[i].data(), specific_format_rewrite[i].size()}; + } + } + return {raw_str, str_size}; + } + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { auto null_map = ColumnUInt8::create(input_rows_count, 0); @@ -161,9 +176,10 @@ private: const char* r_raw_str = reinterpret_cast(&rdata[roffsets[i - 1]]); size_t r_str_size = roffsets[i] - roffsets[i - 1]; - - _execute_inner_loop(l_raw_str, l_str_size, r_raw_str, - r_str_size, context, res, null_map, i); + const StringRef format_str = rewrite_specific_format(r_raw_str, r_str_size); + _execute_inner_loop(l_raw_str, l_str_size, format_str.data, + format_str.size, context, res, null_map, + i); } } template @@ -173,12 +189,14 @@ private: NullMap& null_map) { size_t size = loffsets.size(); res.resize(size); + const StringRef format_str = rewrite_specific_format(rdata.data, rdata.size); for (size_t i = 0; i < size; ++i) { const char* l_raw_str = reinterpret_cast(&ldata[loffsets[i - 1]]); size_t l_str_size = loffsets[i] - loffsets[i - 1]; - _execute_inner_loop(l_raw_str, l_str_size, rdata.data, - rdata.size, context, res, null_map, i); + _execute_inner_loop(l_raw_str, l_str_size, format_str.data, + format_str.size, context, res, null_map, + i); } } template diff --git a/regression-test/data/correctness/test_str_to_date.out b/regression-test/data/correctness/test_str_to_date.out new file mode 100644 index 0000000000..af6342ecf6 --- /dev/null +++ b/regression-test/data/correctness/test_str_to_date.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +2019-12-01 yyyy-MM-dd 2019-12-01T00:00 +20201203 yyyyMMdd 2020-12-03T00:00 +2020-12-03 11:45:14 yyyy-MM-dd HH:mm:ss 2020-12-03T11:45:14 + +-- !select2 -- +2019-12-01 + +-- !select3 -- +2020-12-03 + +-- !select4 -- +2020-12-03T11:45:14 + +-- !select5 -- +2019-12-01 yyyy-MM-dd 2019-12-01T00:00 +20201203 yyyyMMdd 2020-12-03T00:00 +2020-12-03 11:45:14 yyyy-MM-dd HH:mm:ss 2020-12-03T11:45:14 + +-- !select6 -- +2019-12-01 + +-- !select7 -- +2020-12-03 + +-- !select8 -- +2020-12-03T11:45:14 + diff --git a/regression-test/suites/correctness/test_str_to_date.groovy b/regression-test/suites/correctness/test_str_to_date.groovy new file mode 100644 index 0000000000..6c26a8db24 --- /dev/null +++ b/regression-test/suites/correctness/test_str_to_date.groovy @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_str_to_date") { + sql """ DROP TABLE IF EXISTS test_str_to_date_db """ + + sql """ + CREATE TABLE IF NOT EXISTS test_str_to_date_db ( + `id` INT NULL COMMENT "", + `s1` String NULL COMMENT "", + `s2` String NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ); + """ + + sql """ INSERT INTO test_str_to_date_db VALUES(1,'2019-12-01', 'yyyy-MM-dd');""" + sql """ INSERT INTO test_str_to_date_db VALUES(2,'20201203', 'yyyyMMdd');""" + sql """ INSERT INTO test_str_to_date_db VALUES(3,'2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss');""" + + sql """ set enable_nereids_planner=true , enable_fallback_to_original_planner=false;""" + + + qt_select1 """ + select s1,s2,STR_TO_DATE(s1,s2) from test_str_to_date_db order by id; + """ + + qt_select2 """ + SELECT STR_TO_DATE('2019-12-01', 'yyyy-MM-dd'); + """ + + qt_select3 """ + SELECT STR_TO_DATE('20201203', 'yyyyMMdd'); + """ + + qt_select4 """ + SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); + """ + + + sql """ set enable_nereids_planner=false;""" + + qt_select5 """ + select s1,s2,STR_TO_DATE(s1,s2) from test_str_to_date_db order by id; + """ + + qt_select6 """ + SELECT STR_TO_DATE('2019-12-01', 'yyyy-MM-dd'); + """ + + qt_select7 """ + SELECT STR_TO_DATE('20201203', 'yyyyMMdd'); + """ + + qt_select8 """ + SELECT STR_TO_DATE('2020-12-03 11:45:14', 'yyyy-MM-dd HH:mm:ss'); + """ + + +}