[feature](function)Support negative index for function split_part (#13914)

This commit is contained in:
Yulei-Yang
2022-12-12 09:56:09 +08:00
committed by GitHub
parent 614d7273f5
commit 33349c3419
8 changed files with 384 additions and 130 deletions

View File

@ -920,35 +920,98 @@ static int index_of(const uint8_t* source, int source_offset, int source_count,
return -1;
}
StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& content,
const StringVal& delimiter, const IntVal& field) {
if (content.is_null || delimiter.is_null || field.is_null || field.val <= 0) {
return StringVal::null();
static int last_index_of(const uint8_t* source, int source_len, const uint8_t* target,
int target_len, int to_index) {
if (to_index < 0) {
return -1;
}
std::vector<int> find(field.val, -1); //store substring position
int from = 0;
for (int i = 1; i <= field.val; i++) { // find
int last_index = i - 1;
find[last_index] =
index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from);
from = find[last_index] + delimiter.len;
if (find[last_index] == -1) {
break;
if (to_index >= source_len) {
to_index = source_len - 1;
}
if (target_len == 0) {
return to_index;
}
const uint8_t last = target[target_len - 1];
int min = target_len;
for (int i = to_index; i >= min; i--) {
while (i >= min && source[i] != last) {
i--; // Look for last character
}
if (i >= min) { // Found first character, now look at the rest of v2
int j = i - 1;
int end = j - target_len + 1;
for (int k = target_len - 2; j > end && source[j] == target[k];) {
j--;
k--;
}
if (j == end) {
return i - target_len + 1;
}
}
}
if ((field.val > 1 && find[field.val - 2] == -1) ||
(field.val == 1 && find[field.val - 1] == -1)) {
// field not find return null
return -1;
}
StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& content,
const StringVal& delimiter, const IntVal& field) {
if (content.is_null || delimiter.is_null || field.is_null || field.val == 0) {
return StringVal::null();
}
int start_pos;
if (field.val == 1) { // find need split first part
start_pos = 0;
if (field.val > 0) {
int from = 0;
std::vector<int> find(field.val, -1); //store substring position
for (int i = 1; i <= field.val; i++) { // find
int last_index = i - 1;
find[last_index] =
index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from);
from = find[last_index] + delimiter.len;
if (find[last_index] == -1) {
break;
}
}
if ((field.val > 1 && find[field.val - 2] == -1) ||
(field.val == 1 && find[field.val - 1] == -1)) {
// field not find return null
return StringVal::null();
}
int start_pos;
if (field.val == 1) { // find need split first part
start_pos = 0;
} else {
start_pos = find[field.val - 2] + delimiter.len;
}
int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos;
return StringVal(content.ptr + start_pos, len);
} else {
start_pos = find[field.val - 2] + delimiter.len;
int to = content.len;
int abs_field = -field.val;
std::vector<int> find(abs_field, -1); //store substring position
for (int i = 1; i <= abs_field; i++) { // find
int last_index = i - 1;
find[last_index] =
last_index_of(content.ptr, content.len, delimiter.ptr, delimiter.len, to);
to = find[last_index] - delimiter.len;
if (find[last_index] == -1) {
break;
}
}
if ((abs_field > 1 && find[abs_field - 2] == -1) ||
(abs_field == 1 && find[abs_field - 1] == -1)) {
// field not find return null
return StringVal::null();
}
int end_pos;
if (abs_field == 1) { // find need split first part
end_pos = content.len - 1;
} else {
end_pos = find[abs_field - 2] - 1;
}
int len =
end_pos - (find[abs_field - 1] == -1 ? 0 : find[abs_field - 1] + delimiter.len) + 1;
return StringVal(content.ptr + end_pos - len + 1, len);
}
int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos;
return StringVal(content.ptr + start_pos, len);
}
StringVal StringFunctions::replace(FunctionContext* context, const StringVal& origStr,