[bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)

This commit is contained in:
Kang
2023-06-11 14:00:04 +08:00
committed by GitHub
parent ca1e2ddf43
commit bd9a9a32f5
3 changed files with 57 additions and 7 deletions

View File

@ -25,6 +25,9 @@
namespace doris {
const std::string S3URI::_SCHEME_S3 = "s3";
const std::string S3URI::_SCHEME_HTTP = "http";
const std::string S3URI::_SCHEME_HTTPS = "https";
const std::string S3URI::_SCHEME_DELIM = "://";
const std::string S3URI::_PATH_DELIM = "/";
const std::string S3URI::_QUERY_DELIM = "?";
@ -42,15 +45,30 @@ Status S3URI::parse() {
std::vector<std::string> scheme_split = strings::Split(_location, _SCHEME_DELIM);
std::string rest;
if (scheme_split.size() == 2) {
// has scheme, eg: s3://bucket1/path/to/file.txt
rest = scheme_split[1];
std::vector<std::string> authority_split =
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
if (authority_split.size() != 2) {
if (scheme_split[0] == _SCHEME_S3) {
// has scheme, eg: s3://bucket1/path/to/file.txt
rest = scheme_split[1];
std::vector<std::string> authority_split =
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
if (authority_split.size() != 2) {
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
}
_bucket = authority_split[0];
_key = authority_split[1];
} else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
// has scheme, eg: http(s)://host/bucket1/path/to/file.txt
rest = scheme_split[1];
std::vector<std::string> authority_split =
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 2));
if (authority_split.size() != 3) {
return Status::InvalidArgument("Invalid S3 HTTP URI: {}", _location);
}
// authority_split[1] is host
_bucket = authority_split[1];
_key = authority_split[2];
} else {
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
}
_bucket = authority_split[0];
_key = authority_split[1];
} else if (scheme_split.size() == 1) {
// no scheme, eg: path/to/file.txt
_bucket = ""; // unknown

View File

@ -41,6 +41,9 @@ public:
std::string to_string() const;
private:
static const std::string _SCHEME_S3;
static const std::string _SCHEME_HTTP;
static const std::string _SCHEME_HTTPS;
static const std::string _SCHEME_DELIM;
static const std::string _PATH_DELIM;
static const std::string _QUERY_DELIM;

View File

@ -56,13 +56,42 @@ TEST_F(S3URITest, EncodedString) {
EXPECT_EQ("path%20to%20file", uri1.get_key());
}
TEST_F(S3URITest, HttpURI) {
std::string p1 = "http://a.b.com/bucket/path/to/file";
S3URI uri1(p1);
EXPECT_TRUE(uri1.parse());
EXPECT_EQ("bucket", uri1.get_bucket());
EXPECT_EQ("path/to/file", uri1.get_key());
std::string p2 = "https://a.b.com/bucket/path/to/file";
S3URI uri2(p2);
EXPECT_TRUE(uri2.parse());
EXPECT_EQ("bucket", uri2.get_bucket());
EXPECT_EQ("path/to/file", uri2.get_key());
}
TEST_F(S3URITest, InvalidSchema) {
std::string p1 = "xxx://a.b.com/bucket/path/to/file";
S3URI uri1(p1);
EXPECT_FALSE(uri1.parse());
}
TEST_F(S3URITest, MissingKey) {
std::string p1 = "https://bucket/";
S3URI uri1(p1);
EXPECT_FALSE(uri1.parse());
std::string p2 = "s3://bucket/";
S3URI uri2(p2);
EXPECT_FALSE(uri2.parse());
std::string p3 = "http://a.b.com/bucket/";
S3URI uri3(p3);
EXPECT_FALSE(uri3.parse());
std::string p4 = "http://a.b.com/";
S3URI uri4(p4);
EXPECT_FALSE(uri4.parse());
}
TEST_F(S3URITest, RelativePathing) {