Cherry-picked from #50149 Co-authored-by: lw112 <131352377+felixwluo@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
af0bca698a
commit
98f5d95fba
@ -2205,24 +2205,42 @@ public:
|
||||
|
||||
const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
|
||||
|
||||
[[maybe_unused]] const auto& [delimiter_col, delimiter_const] =
|
||||
// Handle both constant and non-constant delimiter parameters
|
||||
ColumnPtr delimiter_column_ptr;
|
||||
bool delimiter_const = false;
|
||||
std::tie(delimiter_column_ptr, delimiter_const) =
|
||||
unpack_if_const(block.get_by_position(arguments[1]).column);
|
||||
auto delimiter = delimiter_col->get_data_at(0);
|
||||
int32_t delimiter_size = delimiter.size;
|
||||
const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
|
||||
|
||||
[[maybe_unused]] const auto& [part_num_col, part_const] =
|
||||
ColumnPtr part_num_column_ptr;
|
||||
bool part_num_const = false;
|
||||
std::tie(part_num_column_ptr, part_num_const) =
|
||||
unpack_if_const(block.get_by_position(arguments[2]).column);
|
||||
auto part_number = *((int*)part_num_col->get_data_at(0).data);
|
||||
const ColumnVector<Int32>* part_num_col =
|
||||
assert_cast<const ColumnVector<Int32>*>(part_num_column_ptr.get());
|
||||
|
||||
if (part_number == 0 || delimiter_size == 0) {
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
// For constant multi-character delimiters, create StringRef and StringSearch only once
|
||||
std::optional<StringRef> const_delimiter_ref;
|
||||
std::optional<StringSearch> const_search;
|
||||
if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
|
||||
const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
|
||||
const_search.emplace(&const_delimiter_ref.value());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
auto str = str_col->get_data_at(i);
|
||||
auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
|
||||
int32_t delimiter_size = delimiter.size;
|
||||
|
||||
auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
|
||||
|
||||
if (part_number == 0 || delimiter_size == 0) {
|
||||
StringOP::push_empty_string(i, res_chars, res_offsets);
|
||||
continue;
|
||||
}
|
||||
} else if (part_number > 0) {
|
||||
if (delimiter_size == 1) {
|
||||
// If delimiter is a char, use memchr to split
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
auto str = str_col->get_data_at(i);
|
||||
|
||||
if (part_number > 0) {
|
||||
if (delimiter_size == 1) {
|
||||
int32_t offset = -1;
|
||||
int32_t num = 0;
|
||||
while (num < part_number) {
|
||||
@ -2248,18 +2266,23 @@ public:
|
||||
StringOP::push_value_string(std::string_view(str.data, str.size), i,
|
||||
res_chars, res_offsets);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
StringRef delimiter_ref(delimiter);
|
||||
StringSearch search(&delimiter_ref);
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
auto str = str_col->get_data_at(i);
|
||||
} else {
|
||||
// For multi-character delimiters
|
||||
// Use pre-created StringRef and StringSearch for constant delimiters
|
||||
StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
|
||||
: StringRef(delimiter);
|
||||
const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
|
||||
StringSearch local_search(&delimiter_ref);
|
||||
if (!search_ptr) {
|
||||
search_ptr = &local_search;
|
||||
}
|
||||
|
||||
int32_t offset = -delimiter_size;
|
||||
int32_t num = 0;
|
||||
while (num < part_number) {
|
||||
size_t n = str.size - offset - delimiter_size;
|
||||
// search first match delimter_ref index from src string among str_offset to end
|
||||
const char* pos = search.search(str.data + offset + delimiter_size, n);
|
||||
const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
|
||||
if (pos < str.data + str.size) {
|
||||
offset = pos - str.data;
|
||||
num++;
|
||||
@ -2280,21 +2303,25 @@ public:
|
||||
res_chars, res_offsets);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// if part_number is negative
|
||||
part_number = -part_number;
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
auto str = str_col->get_data_at(i);
|
||||
} else {
|
||||
int neg_part_number = -part_number;
|
||||
auto str_str = str.to_string();
|
||||
int32_t offset = str.size;
|
||||
int32_t pre_offset = offset;
|
||||
int32_t num = 0;
|
||||
auto substr = str_str;
|
||||
while (num <= part_number && offset >= 0) {
|
||||
offset = (int)substr.rfind(delimiter, offset);
|
||||
|
||||
// Use pre-created StringRef for constant delimiters
|
||||
StringRef delimiter_str =
|
||||
const_delimiter_ref
|
||||
? const_delimiter_ref.value()
|
||||
: StringRef(reinterpret_cast<const char*>(delimiter.data),
|
||||
delimiter.size);
|
||||
|
||||
while (num <= neg_part_number && offset >= 0) {
|
||||
offset = (int)substr.rfind(delimiter_str, offset);
|
||||
if (offset != -1) {
|
||||
if (++num == part_number) {
|
||||
if (++num == neg_part_number) {
|
||||
break;
|
||||
}
|
||||
pre_offset = offset;
|
||||
@ -2306,7 +2333,7 @@ public:
|
||||
}
|
||||
num = (offset == -1 && num != 0) ? num + 1 : num;
|
||||
|
||||
if (num == part_number) {
|
||||
if (num == neg_part_number) {
|
||||
if (offset == -1) {
|
||||
StringOP::push_value_string(std::string_view(str.data, str.size), i,
|
||||
res_chars, res_offsets);
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
package org.apache.doris.nereids.trees.expressions.functions.scalar;
|
||||
|
||||
import org.apache.doris.catalog.FunctionSignature;
|
||||
import org.apache.doris.nereids.exceptions.AnalysisException;
|
||||
import org.apache.doris.nereids.trees.expressions.Expression;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
|
||||
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
|
||||
@ -53,16 +52,6 @@ public class SubstringIndex extends ScalarFunction
|
||||
super("substring_index", arg0, arg1, arg2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkLegalityBeforeTypeCoercion() {
|
||||
for (int i = 1; i < children.size(); ++i) {
|
||||
if (!getArgument(i).isConstant()) {
|
||||
throw new AnalysisException(getName()
|
||||
+ " function except for the first argument, other parameter must be a constant.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* withChildren.
|
||||
*/
|
||||
|
||||
48
regression-test/data/function_p0/test_substring_index.out
Normal file
48
regression-test/data/function_p0/test_substring_index.out
Normal file
@ -0,0 +1,48 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
1 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 BBB -1 _02|CCC_03|DDD_04|EEE_05|FFF_06
|
||||
2 zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06 ccc -1 zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06
|
||||
3 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 DDD -1 _04|EEE_05|FFF_06
|
||||
4 sgr_01|wsc_02|CCC_03|DDD_04|rfv_05|rgb_06 DDD -1 _04|rfv_05|rgb_06
|
||||
5 cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06 eee -1 cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06
|
||||
6 AAA_01|dsd_02|ert_03|bgt_04|fgh_05|hyb_06 A_01 -1 |dsd_02|ert_03|bgt_04|fgh_05|hyb_06
|
||||
7 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 BBB 1 AAA_01|
|
||||
8 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 | 2 AAA_01|BBB_02
|
||||
9 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 | -2 EEE_05|FFF_06
|
||||
10 ABC | 1 ABC
|
||||
11 ABC|DEF | 0
|
||||
12 ABC 1
|
||||
13 ABC|DEF|GHI 1
|
||||
|
||||
-- !sql --
|
||||
101 北京市|上海市|广州市|深圳市|成都市 | 2 北京市|上海市
|
||||
102 北京市|上海市|广州市|深圳市|成都市 | -2 深圳市|成都市
|
||||
103 北京市|上海市|广州市|深圳市|成都市 上海 -1 市|广州市|深圳市|成都市
|
||||
104 中国人民共和国 人民 1 中国
|
||||
105 中国人民共和国 人民 -1 共和国
|
||||
106 你好,世界!你好,朋友! 你好 1
|
||||
107 你好,世界!你好,朋友! 你好 -1 ,朋友!
|
||||
108 你好,世界!你好,朋友! 世界 -1 !你好,朋友!
|
||||
109 中文|测试|数据 测试 1 中文|
|
||||
110 中文|测试|数据 测试 -1 |数据
|
||||
|
||||
-- !sql --
|
||||
201 hello😀world😀example 😀 1 hello
|
||||
202 hello😀world😀example 😀 2 hello😀world
|
||||
203 hello😀world😀example 😀 -1 example
|
||||
204 👋👋hello👋world👋 👋 2 👋
|
||||
205 👋👋hello👋world👋 👋 -2 world👋
|
||||
|
||||
-- !sql --
|
||||
1 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 BBB -1 _02|CCC_03|DDD_04|EEE_05|FFF_06
|
||||
3 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 DDD -1 _04|EEE_05|FFF_06
|
||||
7 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 BBB 1 AAA_01|
|
||||
8 AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 | 2 AAA_01|BBB_02
|
||||
101 北京市|上海市|广州市|深圳市|成都市 | 2 北京市|上海市
|
||||
103 北京市|上海市|广州市|深圳市|成都市 上海 -1 市|广州市|深圳市|成都市
|
||||
201 hello😀world😀example 😀 1 hello
|
||||
203 hello😀world😀example 😀 -1 example
|
||||
|
||||
-- !sql --
|
||||
test|test test|test
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
1 BBB AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 _02|CCC_03|DDD_04|EEE_05|FFF_06
|
||||
2 ccc zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06 zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06
|
||||
3 DDD AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06 _04|EEE_05|FFF_06
|
||||
4 DDD sgr_01|wsc_02|CCC_03|DDD_04|rfv_05|rgb_06 _04|rfv_05|rgb_06
|
||||
5 eee cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06 cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06
|
||||
6 A_01 AAA_01|dsd_02|ert_03|bgt_04|fgh_05|hyb_06 |dsd_02|ert_03|bgt_04|fgh_05|hyb_06
|
||||
|
||||
-- !sql --
|
||||
7 市 北京市|上海市|广州市|深圳市 北京
|
||||
8 人民 中华人民共和国 中华 共和国
|
||||
9 分隔符 中文分隔符测试分隔符数据 中文 数据
|
||||
10 你好 你好,世界!你好,朋友! ,朋友!
|
||||
|
||||
-- !sql --
|
||||
1 field1,field2,field3,field4 , 2 field1,field2
|
||||
2 field1,field2,field3,field4 , -1 field4
|
||||
3 AAA_01|BBB_02|CCC_03 | 2 AAA_01|BBB_02
|
||||
4 AAA_01|BBB_02|CCC_03 | -2 BBB_02|CCC_03
|
||||
5 中文分隔符测试分隔符数据 分隔符 1 中文
|
||||
6 中文分隔符测试分隔符数据 分隔符 -1 数据
|
||||
|
||||
@ -0,0 +1,82 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
AAA_01
|
||||
|
||||
-- !sql --
|
||||
CCC_03
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02
|
||||
|
||||
-- !sql --
|
||||
BBB_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
|
||||
|
||||
-- !sql --
|
||||
|
||||
|
||||
-- !sql --
|
||||
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
AAA_01
|
||||
|
||||
-- !sql --
|
||||
CCC_03
|
||||
|
||||
-- !sql --
|
||||
_02|CCC_03
|
||||
|
||||
-- !sql --
|
||||
|
||||
|
||||
-- !sql --
|
||||
|
||||
|
||||
-- !sql --
|
||||
北京市|上海市
|
||||
|
||||
-- !sql --
|
||||
北京市
|
||||
|
||||
-- !sql --
|
||||
广州市
|
||||
|
||||
-- !sql --
|
||||
hello
|
||||
|
||||
-- !sql --
|
||||
example
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02
|
||||
|
||||
-- !sql --
|
||||
AAA_01|BBB_02
|
||||
|
||||
-- !sql --
|
||||
中文_
|
||||
|
||||
-- !sql --
|
||||
_02|CCC_03 AAA_01|BBB_02|CCC_03
|
||||
|
||||
137
regression-test/suites/function_p0/test_substring_index.groovy
Normal file
137
regression-test/suites/function_p0/test_substring_index.groovy
Normal file
@ -0,0 +1,137 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_substring_index") {
|
||||
sql "set enable_nereids_planner=true"
|
||||
sql "set enable_fallback_to_original_planner=false"
|
||||
|
||||
sql "DROP TABLE IF EXISTS test_substring_index"
|
||||
sql """
|
||||
CREATE TABLE test_substring_index (
|
||||
id INT,
|
||||
str VARCHAR(100),
|
||||
delimiter VARCHAR(10),
|
||||
count INT
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(id)
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_substring_index VALUES
|
||||
(1, 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06', 'BBB', -1),
|
||||
(2, 'zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06', 'ccc', -1),
|
||||
(3, 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06', 'DDD', -1),
|
||||
(4, 'sgr_01|wsc_02|CCC_03|DDD_04|rfv_05|rgb_06', 'DDD', -1),
|
||||
(5, 'cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06', 'eee', -1),
|
||||
(6, 'AAA_01|dsd_02|ert_03|bgt_04|fgh_05|hyb_06', 'A_01', -1),
|
||||
(7, 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06', 'BBB', 1),
|
||||
(8, 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06', '|', 2),
|
||||
(9, 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06', '|', -2),
|
||||
(10, 'ABC', '|', 1),
|
||||
(11, 'ABC|DEF', '|', 0),
|
||||
(12, '', 'ABC', 1),
|
||||
(13, 'ABC|DEF|GHI', '', 1)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_substring_index VALUES
|
||||
(101, '北京市|上海市|广州市|深圳市|成都市', '|', 2),
|
||||
(102, '北京市|上海市|广州市|深圳市|成都市', '|', -2),
|
||||
(103, '北京市|上海市|广州市|深圳市|成都市', '上海', -1),
|
||||
(104, '中国人民共和国', '人民', 1),
|
||||
(105, '中国人民共和国', '人民', -1),
|
||||
(106, '你好,世界!你好,朋友!', '你好', 1),
|
||||
(107, '你好,世界!你好,朋友!', '你好', -1),
|
||||
(108, '你好,世界!你好,朋友!', '世界', -1),
|
||||
(109, '中文|测试|数据', '测试', 1),
|
||||
(110, '中文|测试|数据', '测试', -1)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_substring_index VALUES
|
||||
(201, 'hello😀world😀example', '😀', 1),
|
||||
(202, 'hello😀world😀example', '😀', 2),
|
||||
(203, 'hello😀world😀example', '😀', -1),
|
||||
(204, '👋👋hello👋world👋', '👋', 2),
|
||||
(205, '👋👋hello👋world👋', '👋', -2)
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
id,
|
||||
str,
|
||||
delimiter,
|
||||
count,
|
||||
substring_index(str, delimiter, count) as result
|
||||
FROM test_substring_index
|
||||
WHERE id BETWEEN 1 AND 13
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
id,
|
||||
str,
|
||||
delimiter,
|
||||
count,
|
||||
substring_index(str, delimiter, count) as result
|
||||
FROM test_substring_index
|
||||
WHERE id BETWEEN 101 AND 110
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
id,
|
||||
str,
|
||||
delimiter,
|
||||
count,
|
||||
substring_index(str, delimiter, count) as result
|
||||
FROM test_substring_index
|
||||
WHERE id BETWEEN 201 AND 205
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
a.id,
|
||||
a.str,
|
||||
a.delimiter,
|
||||
b.count,
|
||||
substring_index(a.str, a.delimiter, b.count) as result
|
||||
FROM test_substring_index a
|
||||
JOIN test_substring_index b ON a.id = b.id
|
||||
WHERE a.id IN (1, 3, 7, 8, 101, 103, 201, 203)
|
||||
ORDER BY a.id
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
substring_index('', '', 1) as empty_all,
|
||||
substring_index('test', '', 1) as empty_delimiter,
|
||||
substring_index('', 'test', 1) as empty_string,
|
||||
substring_index('test', 'test', 0) as zero_count,
|
||||
substring_index('test|test', '|', 999) as large_count,
|
||||
substring_index('test|test', '|', -999) as large_negative_count
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS test_substring_index"
|
||||
}
|
||||
@ -0,0 +1,114 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_substring_index_columns") {
|
||||
sql "set enable_nereids_planner=true"
|
||||
sql "set enable_fallback_to_original_planner=false"
|
||||
|
||||
sql "DROP TABLE IF EXISTS test_substring_index_compat"
|
||||
sql """
|
||||
CREATE TABLE test_substring_index_compat (
|
||||
no INT,
|
||||
sub_str VARCHAR(50),
|
||||
str VARCHAR(100)
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(no)
|
||||
DISTRIBUTED BY HASH(no) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_substring_index_compat VALUES
|
||||
(1, 'BBB', 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06'),
|
||||
(2, 'ccc', 'zyz_01|zyz_02|CCC_03|qwe_04|qwe_05|qwe_06'),
|
||||
(3, 'DDD', 'AAA_01|BBB_02|CCC_03|DDD_04|EEE_05|FFF_06'),
|
||||
(4, 'DDD', 'sgr_01|wsc_02|CCC_03|DDD_04|rfv_05|rgb_06'),
|
||||
(5, 'eee', 'cdr_01|vfr_02|dfc_03|DDD_04|EEE_05|FFF_06'),
|
||||
(6, 'A_01', 'AAA_01|dsd_02|ert_03|bgt_04|fgh_05|hyb_06')
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
no,
|
||||
sub_str AS '分隔符字符串',
|
||||
str AS '需要截取的字符串',
|
||||
substring_index(str, sub_str, -1) AS '动态分隔符结果'
|
||||
FROM test_substring_index_compat
|
||||
ORDER BY no
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_substring_index_compat VALUES
|
||||
(7, '市', '北京市|上海市|广州市|深圳市'),
|
||||
(8, '人民', '中华人民共和国'),
|
||||
(9, '分隔符', '中文分隔符测试分隔符数据'),
|
||||
(10, '你好', '你好,世界!你好,朋友!')
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
no,
|
||||
sub_str AS '分隔符字符串',
|
||||
str AS '需要截取的字符串',
|
||||
substring_index(str, sub_str, 1) AS '正向截取',
|
||||
substring_index(str, sub_str, -1) AS '反向截取'
|
||||
FROM test_substring_index_compat
|
||||
WHERE no > 6
|
||||
ORDER BY no
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS test_dynamic_params"
|
||||
sql """
|
||||
CREATE TABLE test_dynamic_params (
|
||||
id INT,
|
||||
source_str VARCHAR(100),
|
||||
delimiter VARCHAR(20),
|
||||
count_val INT
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(id)
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
)
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO test_dynamic_params VALUES
|
||||
(1, 'field1,field2,field3,field4', ',', 2),
|
||||
(2, 'field1,field2,field3,field4', ',', -1),
|
||||
(3, 'AAA_01|BBB_02|CCC_03', '|', 2),
|
||||
(4, 'AAA_01|BBB_02|CCC_03', '|', -2),
|
||||
(5, '中文分隔符测试分隔符数据', '分隔符', 1),
|
||||
(6, '中文分隔符测试分隔符数据', '分隔符', -1)
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
id,
|
||||
source_str,
|
||||
delimiter,
|
||||
count_val,
|
||||
substring_index(source_str, delimiter, count_val) AS result
|
||||
FROM test_dynamic_params
|
||||
ORDER BY id
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS test_substring_index_compat"
|
||||
sql "DROP TABLE IF EXISTS test_dynamic_params"
|
||||
}
|
||||
@ -0,0 +1,89 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_substring_index_simple") {
|
||||
sql "set enable_nereids_planner=true"
|
||||
sql "set enable_fallback_to_original_planner=false"
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', 2) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', -2) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', 'XYZ', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', 'XYZ', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('', '|', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', 0) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', 10) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', -10) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01||BBB_02||CCC_03', '||', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01||BBB_02||CCC_03', '||', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03', 'BBB', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('|AAA_01|BBB_02|CCC_03', '|', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('AAA_01|BBB_02|CCC_03|', '|', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('北京市|上海市|广州市', '|', 2) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('北京市分隔符上海市分隔符广州市', '分隔符', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('北京市分隔符上海市分隔符广州市', '分隔符', -1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('hello😀world😀example', '😀', 1) as result"""
|
||||
|
||||
qt_sql """SELECT substring_index('hello😀world😀example', '😀', -1) as result"""
|
||||
|
||||
qt_sql """
|
||||
SELECT substring_index('AAA_01|BBB_02|CCC_03', (SELECT '|'), 2) as result
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT substring_index('AAA_01|BBB_02|CCC_03', '|', (SELECT 2)) as result
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT substring_index('AAA_01|BBB_02|CCC_03', (SELECT '|'), (SELECT 2)) as result
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT substring_index('AAA_01|BBB_02|CCC_03', concat('|'), 2) as result
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT substring_index('中文_分隔符_测试_分隔符_数据', concat('分', '隔', '符'), 1) as result
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
substring_index('AAA_01|BBB_02|CCC_03', 'BBB', -1) as result1,
|
||||
substring_index('AAA_01|BBB_02|CCC_03', 'bbb', -1) as result2
|
||||
"""
|
||||
}
|
||||
Reference in New Issue
Block a user