[bugfix](s3 fs) fix s3 uri parsing for http/https uri (#20656)
This commit is contained in:
@ -25,6 +25,9 @@
|
||||
|
||||
namespace doris {
|
||||
|
||||
const std::string S3URI::_SCHEME_S3 = "s3";
|
||||
const std::string S3URI::_SCHEME_HTTP = "http";
|
||||
const std::string S3URI::_SCHEME_HTTPS = "https";
|
||||
const std::string S3URI::_SCHEME_DELIM = "://";
|
||||
const std::string S3URI::_PATH_DELIM = "/";
|
||||
const std::string S3URI::_QUERY_DELIM = "?";
|
||||
@ -42,15 +45,30 @@ Status S3URI::parse() {
|
||||
std::vector<std::string> scheme_split = strings::Split(_location, _SCHEME_DELIM);
|
||||
std::string rest;
|
||||
if (scheme_split.size() == 2) {
|
||||
// has scheme, eg: s3://bucket1/path/to/file.txt
|
||||
rest = scheme_split[1];
|
||||
std::vector<std::string> authority_split =
|
||||
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
|
||||
if (authority_split.size() != 2) {
|
||||
if (scheme_split[0] == _SCHEME_S3) {
|
||||
// has scheme, eg: s3://bucket1/path/to/file.txt
|
||||
rest = scheme_split[1];
|
||||
std::vector<std::string> authority_split =
|
||||
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 1));
|
||||
if (authority_split.size() != 2) {
|
||||
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
|
||||
}
|
||||
_bucket = authority_split[0];
|
||||
_key = authority_split[1];
|
||||
} else if (scheme_split[0] == _SCHEME_HTTP || scheme_split[0] == _SCHEME_HTTPS) {
|
||||
// has scheme, eg: http(s)://host/bucket1/path/to/file.txt
|
||||
rest = scheme_split[1];
|
||||
std::vector<std::string> authority_split =
|
||||
strings::Split(rest, strings::delimiter::Limit(_PATH_DELIM, 2));
|
||||
if (authority_split.size() != 3) {
|
||||
return Status::InvalidArgument("Invalid S3 HTTP URI: {}", _location);
|
||||
}
|
||||
// authority_split[1] is host
|
||||
_bucket = authority_split[1];
|
||||
_key = authority_split[2];
|
||||
} else {
|
||||
return Status::InvalidArgument("Invalid S3 URI: {}", _location);
|
||||
}
|
||||
_bucket = authority_split[0];
|
||||
_key = authority_split[1];
|
||||
} else if (scheme_split.size() == 1) {
|
||||
// no scheme, eg: path/to/file.txt
|
||||
_bucket = ""; // unknown
|
||||
|
||||
@ -41,6 +41,9 @@ public:
|
||||
std::string to_string() const;
|
||||
|
||||
private:
|
||||
static const std::string _SCHEME_S3;
|
||||
static const std::string _SCHEME_HTTP;
|
||||
static const std::string _SCHEME_HTTPS;
|
||||
static const std::string _SCHEME_DELIM;
|
||||
static const std::string _PATH_DELIM;
|
||||
static const std::string _QUERY_DELIM;
|
||||
|
||||
@ -56,13 +56,42 @@ TEST_F(S3URITest, EncodedString) {
|
||||
EXPECT_EQ("path%20to%20file", uri1.get_key());
|
||||
}
|
||||
|
||||
TEST_F(S3URITest, HttpURI) {
|
||||
std::string p1 = "http://a.b.com/bucket/path/to/file";
|
||||
S3URI uri1(p1);
|
||||
EXPECT_TRUE(uri1.parse());
|
||||
EXPECT_EQ("bucket", uri1.get_bucket());
|
||||
EXPECT_EQ("path/to/file", uri1.get_key());
|
||||
|
||||
std::string p2 = "https://a.b.com/bucket/path/to/file";
|
||||
S3URI uri2(p2);
|
||||
EXPECT_TRUE(uri2.parse());
|
||||
EXPECT_EQ("bucket", uri2.get_bucket());
|
||||
EXPECT_EQ("path/to/file", uri2.get_key());
|
||||
}
|
||||
|
||||
TEST_F(S3URITest, InvalidSchema) {
|
||||
std::string p1 = "xxx://a.b.com/bucket/path/to/file";
|
||||
S3URI uri1(p1);
|
||||
EXPECT_FALSE(uri1.parse());
|
||||
}
|
||||
|
||||
TEST_F(S3URITest, MissingKey) {
|
||||
std::string p1 = "https://bucket/";
|
||||
S3URI uri1(p1);
|
||||
EXPECT_FALSE(uri1.parse());
|
||||
|
||||
std::string p2 = "s3://bucket/";
|
||||
S3URI uri2(p2);
|
||||
EXPECT_FALSE(uri2.parse());
|
||||
|
||||
std::string p3 = "http://a.b.com/bucket/";
|
||||
S3URI uri3(p3);
|
||||
EXPECT_FALSE(uri3.parse());
|
||||
|
||||
std::string p4 = "http://a.b.com/";
|
||||
S3URI uri4(p4);
|
||||
EXPECT_FALSE(uri4.parse());
|
||||
}
|
||||
|
||||
TEST_F(S3URITest, RelativePathing) {
|
||||
|
||||
Reference in New Issue
Block a user