[opt](function) refactor extract_url to use StringValue (#13508)

change extract_url use stringvalue to repalce std::string to speed up
This commit is contained in:
HappenLee
2022-10-21 08:33:39 +08:00
committed by GitHub
parent 3dd00df24b
commit e62d3dd8e5
6 changed files with 38 additions and 33 deletions

View File

@ -173,6 +173,9 @@ struct StringValue {
// Trims leading and trailing spaces.
StringValue trim() const;
// Find the first position char of appear, return -1 if not found
int64_t find_first_of(char c) const;
void to_string_val(doris_udf::StringVal* sv) const {
*sv = doris_udf::StringVal(reinterpret_cast<uint8_t*>(ptr), len);
}

View File

@ -51,4 +51,9 @@ inline StringValue StringValue::trim() const {
return StringValue(ptr + begin, end - begin + 1);
}
inline int64_t StringValue::find_first_of(char c) const {
const char* p = static_cast<const char*>(memchr(ptr, c, len));
return p == nullptr ? -1 : p - ptr;
}
} // namespace doris

View File

@ -346,9 +346,8 @@ UrlParser::UrlPart UrlParser::get_url_part(const StringValue& part) {
}
}
std::string UrlParser::extract_url(const StringValue& url, const StringValue& name) {
std::string result;
std::string str_name = name.to_string();
StringValue UrlParser::extract_url(StringValue url, StringValue name) {
StringValue result("", 0);
// Remove leading and trailing spaces.
StringValue trimmed_url = url.trim();
// find '?'
@ -358,45 +357,45 @@ std::string UrlParser::extract_url(const StringValue& url, const StringValue& na
// Example: https://doris.apache.org/
return result;
}
// find '#'
int32_t hash_pos = _s_hash_search.search(&trimmed_url);
std::string sub_url = "";
StringValue sub_url;
if (hash_pos < 0) {
sub_url = trimmed_url.substring(question_pos + 1, trimmed_url.len - question_pos - 1)
.to_string();
sub_url = trimmed_url.substring(question_pos + 1, trimmed_url.len - question_pos - 1);
} else {
sub_url = trimmed_url.substring(question_pos + 1, hash_pos - question_pos - 1).to_string();
sub_url = trimmed_url.substring(question_pos + 1, hash_pos - question_pos - 1);
}
// find '&' and '=', and extract target parameter
// Example: k1=aa&k2=bb&k3=cc&test=dd
std::string::size_type and_pod;
std::string::size_type len = sub_url.length();
std::string key_url;
int64_t and_pod;
auto len = sub_url.len;
StringValue key_url;
while (true) {
if (len <= 0) {
break;
}
and_pod = sub_url.find_first_of('&');
if (and_pod != std::string::npos) {
key_url = sub_url.substr(0, and_pod);
sub_url = sub_url.substr(and_pod + 1, len - and_pod);
if (and_pod != -1) {
key_url = sub_url.substring(0, and_pod);
sub_url = sub_url.substring(and_pod + 1, len - and_pod);
} else {
key_url = sub_url;
sub_url = "";
auto end_pos = sub_url.find_first_of('#');
key_url = end_pos == -1 ? sub_url : sub_url.substring(0, end_pos);
sub_url = result;
}
len = sub_url.length();
len = sub_url.len;
std::string::size_type eq_pod = key_url.find_first_of('=');
if (eq_pod == std::string::npos) {
auto eq_pod = key_url.find_first_of('=');
if (eq_pod == -1) {
// invalid url. like: k1&k2=bb
continue;
}
int32_t key_len = key_url.length();
std::string key = key_url.substr(0, eq_pod);
if (str_name == key) {
result = key_url.substr(eq_pod + 1, key_len - eq_pod);
return result;
int32_t key_len = key_url.len;
auto key = key_url.substring(0, eq_pod);
if (name == key) {
return key_url.substring(eq_pod + 1, key_len - eq_pod - 1);
}
}
return result;

View File

@ -63,7 +63,7 @@ public:
// Extract parameter value from url
// Example for url:
// http://doris.apache.org?k1=aa&k2=bb&k3=cc&test=dd#999
static std::string extract_url(const StringValue& url, const StringValue& name);
static StringValue extract_url(StringValue url, StringValue name);
private:
// Constants representing parts of a URL.

View File

@ -1276,12 +1276,9 @@ public:
for (int i = 0; i < input_rows_count; ++i) {
auto source = url_col->get_data_at(i);
auto param = parameter_col->get_data_at(i);
StringValue url_str(const_cast<char*>(source.data), source.size);
StringValue parameter_str(const_cast<char*>(param.data), param.size);
auto res = extract_url(source, param);
std::string result = extract_url(url_str, parameter_str);
col_res->insert_data(result.data(), result.length());
col_res->insert_data(res.ptr, res.len);
}
block.replace_by_position(result, std::move(col_res));
@ -1289,11 +1286,11 @@ public:
}
private:
std::string extract_url(StringValue url, StringValue parameter) {
if (url.len == 0 || parameter.len == 0) {
return "";
StringValue extract_url(StringRef url, StringRef parameter) {
if (url.size == 0 || parameter.size == 0) {
return StringValue("", 0);
}
return UrlParser::extract_url(url, parameter);
return UrlParser::extract_url(StringValue(url), StringValue(parameter));
}
};

View File

@ -388,6 +388,7 @@
"sql-manual/sql-functions/string-functions/split_part",
"sql-manual/sql-functions/string-functions/money_format",
"sql-manual/sql-functions/string-functions/parse_url",
"sql-manual/sql-functions/string-functions/extract_url_parameter",
{
"type": "category",
"label": "Fuzzy Match",