diff --git a/be/src/util/url_parser.cpp b/be/src/util/url_parser.cpp index 149398b39a..ddf09b65cb 100644 --- a/be/src/util/url_parser.cpp +++ b/be/src/util/url_parser.cpp @@ -116,6 +116,11 @@ bool UrlParser::parse_url(const StringRef& url, UrlPart part, StringRef* result) } StringRef host_start = protocol_end.substring(start_pos); + // Find first '?'. + int32_t query_start_pos = _s_question_search.search(&host_start); + if (query_start_pos > 0) { + host_start = host_start.substring(0, query_start_pos); + } // Find ':' to strip out port. int32_t end_pos = _s_colon_search.search(&host_start); diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 43bfb958cc..e5f4da64eb 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -1003,6 +1003,10 @@ TEST(function_string_test, function_parse_url_test) { {{std::string("facebook.com/path/p1"), std::string("HOST")}, {Null()}}, {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("HOST")}, {std::string("fb.com")}}, + {{std::string("https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/" + "aa/bb/cc"), + std::string("HOST")}, + {std::string("www.facebook.com")}}, {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("AUTHORITY")}, {std::string("facebook.com")}}, {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("authority")},