[Fix](file system) Make the constructor of XxxFileSystem a private method (#15889)
Since Filesystem inherited std::enable_shared_from_this , it is dangerous to create native point of FileSystem. To avoid this behavior, making the constructor of XxxFileSystem a private method and using the static method create(...) to get a new FileSystem object.
This commit is contained in:
@ -210,7 +210,7 @@ Status FileFactory::create_hdfs_reader(const THdfsParams& hdfs_params, const std
|
||||
io::FileReaderSPtr* reader,
|
||||
const io::FileReaderOptions& reader_options,
|
||||
IOContext* io_ctx) {
|
||||
hdfs_file_system->reset(new io::HdfsFileSystem(hdfs_params, ""));
|
||||
*hdfs_file_system = io::HdfsFileSystem::create(hdfs_params, "");
|
||||
RETURN_IF_ERROR((std::static_pointer_cast<io::HdfsFileSystem>(*hdfs_file_system))->connect());
|
||||
RETURN_IF_ERROR((*hdfs_file_system)->open_file(path, reader_options, reader, io_ctx));
|
||||
return Status::OK();
|
||||
@ -235,7 +235,7 @@ Status FileFactory::create_s3_reader(const std::map<std::string, std::string>& p
|
||||
}
|
||||
S3Conf s3_conf;
|
||||
RETURN_IF_ERROR(ClientFactory::convert_properties_to_s3_conf(prop, s3_uri, &s3_conf));
|
||||
s3_file_system->reset(new io::S3FileSystem(s3_conf, ""));
|
||||
*s3_file_system = io::S3FileSystem::create(s3_conf, "");
|
||||
RETURN_IF_ERROR((std::static_pointer_cast<io::S3FileSystem>(*s3_file_system))->connect());
|
||||
RETURN_IF_ERROR((*s3_file_system)->open_file(s3_uri.get_key(), reader_options, reader, io_ctx));
|
||||
return Status::OK();
|
||||
@ -248,8 +248,8 @@ Status FileFactory::create_broker_reader(const TNetworkAddress& broker_addr,
|
||||
io::FileReaderSPtr* reader,
|
||||
const io::FileReaderOptions& reader_options,
|
||||
IOContext* io_ctx) {
|
||||
broker_file_system->reset(
|
||||
new io::BrokerFileSystem(broker_addr, prop, file_description.file_size));
|
||||
*broker_file_system =
|
||||
io::BrokerFileSystem::create(broker_addr, prop, file_description.file_size);
|
||||
RETURN_IF_ERROR(
|
||||
(std::static_pointer_cast<io::BrokerFileSystem>(*broker_file_system))->connect());
|
||||
RETURN_IF_ERROR((*broker_file_system)
|
||||
|
||||
@ -56,6 +56,13 @@ inline const std::string& client_id(const TNetworkAddress& addr) {
|
||||
}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<BrokerFileSystem> BrokerFileSystem::create(
|
||||
const TNetworkAddress& broker_addr, const std::map<std::string, std::string>& broker_prop,
|
||||
size_t file_size) {
|
||||
return std::shared_ptr<BrokerFileSystem>(
|
||||
new BrokerFileSystem(broker_addr, broker_prop, file_size));
|
||||
}
|
||||
|
||||
BrokerFileSystem::BrokerFileSystem(const TNetworkAddress& broker_addr,
|
||||
const std::map<std::string, std::string>& broker_prop,
|
||||
size_t file_size)
|
||||
|
||||
@ -24,8 +24,9 @@ namespace doris {
|
||||
namespace io {
|
||||
class BrokerFileSystem final : public RemoteFileSystem {
|
||||
public:
|
||||
BrokerFileSystem(const TNetworkAddress& broker_addr,
|
||||
const std::map<std::string, std::string>& broker_prop, size_t file_size);
|
||||
static std::shared_ptr<BrokerFileSystem> create(
|
||||
const TNetworkAddress& broker_addr,
|
||||
const std::map<std::string, std::string>& broker_prop, size_t file_size);
|
||||
|
||||
~BrokerFileSystem() override = default;
|
||||
|
||||
@ -66,6 +67,9 @@ public:
|
||||
Status get_client(std::shared_ptr<BrokerServiceConnection>* client) const;
|
||||
|
||||
private:
|
||||
BrokerFileSystem(const TNetworkAddress& broker_addr,
|
||||
const std::map<std::string, std::string>& broker_prop, size_t file_size);
|
||||
|
||||
const TNetworkAddress& _broker_addr;
|
||||
const std::map<std::string, std::string>& _broker_prop;
|
||||
size_t _file_size;
|
||||
|
||||
@ -44,9 +44,6 @@ enum class FileSystemType : uint8_t {
|
||||
|
||||
class FileSystem : public std::enable_shared_from_this<FileSystem> {
|
||||
public:
|
||||
FileSystem(Path&& root_path, ResourceId&& resource_id, FileSystemType type)
|
||||
: _root_path(std::move(root_path)), _resource_id(std::move(resource_id)), _type(type) {}
|
||||
|
||||
virtual ~FileSystem() = default;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(FileSystem);
|
||||
@ -81,6 +78,9 @@ public:
|
||||
const FileSystemType type() const { return _type; }
|
||||
|
||||
protected:
|
||||
FileSystem(Path&& root_path, ResourceId&& resource_id, FileSystemType type)
|
||||
: _root_path(std::move(root_path)), _resource_id(std::move(resource_id)), _type(type) {}
|
||||
|
||||
Path _root_path;
|
||||
ResourceId _resource_id;
|
||||
FileSystemType _type;
|
||||
|
||||
@ -61,6 +61,11 @@ private:
|
||||
void _clean_oldest();
|
||||
};
|
||||
|
||||
std::shared_ptr<HdfsFileSystem> HdfsFileSystem::create(const THdfsParams& hdfs_params,
|
||||
const std::string& path) {
|
||||
return std::shared_ptr<HdfsFileSystem>(new HdfsFileSystem(hdfs_params, path));
|
||||
}
|
||||
|
||||
HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, const std::string& path)
|
||||
: RemoteFileSystem(path, "", FileSystemType::HDFS),
|
||||
_hdfs_params(hdfs_params),
|
||||
|
||||
@ -81,7 +81,9 @@ private:
|
||||
|
||||
class HdfsFileSystem final : public RemoteFileSystem {
|
||||
public:
|
||||
HdfsFileSystem(const THdfsParams& hdfs_params, const std::string& path);
|
||||
static std::shared_ptr<HdfsFileSystem> create(const THdfsParams& hdfs_params,
|
||||
const std::string& path);
|
||||
|
||||
~HdfsFileSystem() override;
|
||||
|
||||
Status create_file(const Path& path, FileWriterPtr* writer) override;
|
||||
@ -119,6 +121,8 @@ public:
|
||||
HdfsFileSystemHandle* get_handle();
|
||||
|
||||
private:
|
||||
HdfsFileSystem(const THdfsParams& hdfs_params, const std::string& path);
|
||||
|
||||
Path _covert_path(const Path& path) const;
|
||||
const THdfsParams& _hdfs_params;
|
||||
std::string _namenode;
|
||||
|
||||
@ -24,6 +24,11 @@
|
||||
namespace doris {
|
||||
namespace io {
|
||||
|
||||
std::shared_ptr<LocalFileSystem> LocalFileSystem::create(Path path, ResourceId resource_id) {
|
||||
return std::shared_ptr<LocalFileSystem>(
|
||||
new LocalFileSystem(std::move(path), std::move(resource_id)));
|
||||
}
|
||||
|
||||
LocalFileSystem::LocalFileSystem(Path root_path, ResourceId resource_id)
|
||||
: FileSystem(std::move(root_path), std::move(resource_id), FileSystemType::LOCAL) {}
|
||||
|
||||
@ -144,7 +149,7 @@ Status LocalFileSystem::list(const Path& path, std::vector<Path>* files) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
static FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
static FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
|
||||
const FileSystemSPtr& global_local_filesystem() {
|
||||
return local_fs;
|
||||
|
||||
@ -25,7 +25,8 @@ namespace io {
|
||||
|
||||
class LocalFileSystem final : public FileSystem {
|
||||
public:
|
||||
LocalFileSystem(Path root_path, ResourceId resource_id = ResourceId());
|
||||
static std::shared_ptr<LocalFileSystem> create(Path path, ResourceId resource_id = "");
|
||||
|
||||
~LocalFileSystem() override;
|
||||
|
||||
Status create_file(const Path& path, FileWriterPtr* writer) override;
|
||||
@ -52,6 +53,8 @@ public:
|
||||
Status list(const Path& path, std::vector<Path>* files) override;
|
||||
|
||||
private:
|
||||
LocalFileSystem(Path root_path, ResourceId resource_id = ResourceId());
|
||||
|
||||
Path absolute_path(const Path& path) const;
|
||||
};
|
||||
|
||||
|
||||
@ -28,11 +28,5 @@ inline Path operator/(Path&& lhs, const Path& rhs) {
|
||||
return std::move(lhs /= rhs);
|
||||
}
|
||||
|
||||
struct PathHasher {
|
||||
std::size_t operator()(const doris::io::Path& k) const {
|
||||
return std::hash<std::string>()(k.filename().native());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace doris
|
||||
|
||||
@ -49,6 +49,11 @@ namespace io {
|
||||
}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<S3FileSystem> S3FileSystem::create(S3Conf s3_conf, ResourceId resource_id) {
|
||||
return std::shared_ptr<S3FileSystem>(
|
||||
new S3FileSystem(std::move(s3_conf), std::move(resource_id)));
|
||||
}
|
||||
|
||||
S3FileSystem::S3FileSystem(S3Conf s3_conf, ResourceId resource_id)
|
||||
: RemoteFileSystem(
|
||||
fmt::format("{}/{}/{}", s3_conf.endpoint, s3_conf.bucket, s3_conf.prefix),
|
||||
|
||||
@ -35,7 +35,8 @@ namespace io {
|
||||
// This class is thread-safe.(Except `set_xxx` method)
|
||||
class S3FileSystem final : public RemoteFileSystem {
|
||||
public:
|
||||
S3FileSystem(S3Conf s3_conf, ResourceId resource_id);
|
||||
static std::shared_ptr<S3FileSystem> create(S3Conf s3_conf, ResourceId resource_id);
|
||||
|
||||
~S3FileSystem() override;
|
||||
|
||||
Status create_file(const Path& path, FileWriterPtr* writer) override;
|
||||
@ -78,6 +79,8 @@ public:
|
||||
std::string get_key(const Path& path) const;
|
||||
|
||||
private:
|
||||
S3FileSystem(S3Conf s3_conf, ResourceId resource_id);
|
||||
|
||||
S3Conf _s3_conf;
|
||||
|
||||
// FIXME(cyx): We can use std::atomic<std::shared_ptr> since c++20.
|
||||
|
||||
@ -73,7 +73,7 @@ DataDir::DataDir(const std::string& path, int64_t capacity_bytes,
|
||||
TStorageMedium::type storage_medium, TabletManager* tablet_manager,
|
||||
TxnManager* txn_manager)
|
||||
: _path(path),
|
||||
_fs(std::make_shared<io::LocalFileSystem>(path)),
|
||||
_fs(io::LocalFileSystem::create(path)),
|
||||
_capacity_bytes(capacity_bytes),
|
||||
_available_bytes(0),
|
||||
_disk_capacity_bytes(0),
|
||||
|
||||
@ -67,7 +67,7 @@ void StoragePolicyMgr::periodic_put(const std::string& name, const StoragePolicy
|
||||
s3_conf.connect_timeout_ms = policy->s3_conn_timeout_ms;
|
||||
s3_conf.bucket = policy->bucket;
|
||||
s3_conf.prefix = policy->root_path;
|
||||
s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf), name);
|
||||
s3_fs = io::S3FileSystem::create(std::move(s3_conf), name);
|
||||
io::FileSystemMap::instance()->insert(name, s3_fs);
|
||||
_policy_map.emplace(name, policy);
|
||||
} else if (it->second->md5_sum != policy->md5_sum) {
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
// This file is used to fixed macro conflict between butil and gutil
|
||||
// and this file must put the first include in source file
|
||||
|
||||
#include "gutil/macros.h"
|
||||
// Macros in the guti/macros.h, use butil's define
|
||||
#ifdef DISALLOW_IMPLICIT_CONSTRUCTORS
|
||||
#undef DISALLOW_IMPLICIT_CONSTRUCTORS
|
||||
#endif
|
||||
|
||||
#ifdef arraysize
|
||||
#undef arraysize
|
||||
#endif
|
||||
|
||||
#ifdef ARRAY_SIZE
|
||||
#undef ARRAY_SIZE
|
||||
#endif
|
||||
|
||||
#undef OVERRIDE
|
||||
#undef FINAL
|
||||
|
||||
// use be/src/gutil/integral_types.h override butil/basictypes.h
|
||||
#include "gutil/integral_types.h"
|
||||
#ifdef BASE_INTEGRAL_TYPES_H_
|
||||
#define BUTIL_BASICTYPES_H_
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_MODE
|
||||
#undef DEBUG_MODE
|
||||
#endif
|
||||
2
be/test/io/cache/remote_file_cache_test.cpp
vendored
2
be/test/io/cache/remote_file_cache_test.cpp
vendored
@ -163,7 +163,7 @@ protected:
|
||||
// just use to create s3 filesystem, otherwise won't use cache
|
||||
S3Conf s3_conf;
|
||||
std::shared_ptr<io::S3FileSystem> fs =
|
||||
std::make_shared<io::S3FileSystem>(std::move(s3_conf), resource_id);
|
||||
io::S3FileSystem::create(std::move(s3_conf), resource_id);
|
||||
rowset.rowset_meta()->set_resource_id(resource_id);
|
||||
rowset.rowset_meta()->set_num_segments(1);
|
||||
rowset.rowset_meta()->set_fs(fs);
|
||||
|
||||
@ -52,7 +52,7 @@ public:
|
||||
s3_conf.region = config::test_s3_region;
|
||||
s3_conf.bucket = config::test_s3_bucket;
|
||||
s3_conf.prefix = "remote_rowset_gc_test";
|
||||
auto s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf), kResourceId);
|
||||
auto s3_fs = io::S3FileSystem::create(std::move(s3_conf), kResourceId);
|
||||
ASSERT_TRUE(s3_fs->connect().ok());
|
||||
io::FileSystemMap::instance()->insert(kResourceId, s3_fs);
|
||||
|
||||
|
||||
@ -232,7 +232,7 @@ TEST_F(BetaRowsetTest, ReadTest) {
|
||||
s3_conf.prefix = "prefix";
|
||||
io::ResourceId resource_id = "test_resourse_id";
|
||||
std::shared_ptr<io::S3FileSystem> fs =
|
||||
std::make_shared<io::S3FileSystem>(std::move(s3_conf), resource_id);
|
||||
io::S3FileSystem::create(std::move(s3_conf), resource_id);
|
||||
Aws::SDKOptions aws_options = Aws::SDKOptions {};
|
||||
Aws::InitAPI(aws_options);
|
||||
// failed to head object
|
||||
|
||||
@ -51,7 +51,7 @@ public:
|
||||
s3_conf.region = config::test_s3_region;
|
||||
s3_conf.bucket = config::test_s3_bucket;
|
||||
s3_conf.prefix = "tablet_cooldown_test";
|
||||
auto s3_fs = std::make_shared<io::S3FileSystem>(std::move(s3_conf), kResourceId);
|
||||
auto s3_fs = io::S3FileSystem::create(std::move(s3_conf), kResourceId);
|
||||
ASSERT_TRUE(s3_fs->connect().ok());
|
||||
io::FileSystemMap::instance()->insert(kResourceId, s3_fs);
|
||||
|
||||
|
||||
@ -89,7 +89,7 @@ TEST_F(ParquetReaderTest, normal) {
|
||||
DescriptorTbl::create(&obj_pool, t_desc_table, &desc_tbl);
|
||||
|
||||
auto slot_descs = desc_tbl->get_tuple_descriptor(0)->slots();
|
||||
io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
io::FileReaderSPtr reader;
|
||||
local_fs->open_file("./be/test/exec/test_data/parquet_scanner/type-decoder.parquet", &reader,
|
||||
nullptr);
|
||||
|
||||
@ -47,7 +47,7 @@ public:
|
||||
};
|
||||
|
||||
TEST_F(ParquetThriftReaderTest, normal) {
|
||||
io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
io::FileReaderSPtr reader;
|
||||
auto st = local_fs->open_file("./be/test/exec/test_data/parquet_scanner/localfile.parquet",
|
||||
&reader, nullptr);
|
||||
@ -79,7 +79,7 @@ TEST_F(ParquetThriftReaderTest, complex_nested_file) {
|
||||
// `friend` map<string,string>,
|
||||
// `mark` struct<math:int,english:int>)
|
||||
|
||||
io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
io::FileReaderSPtr reader;
|
||||
auto st = local_fs->open_file("./be/test/exec/test_data/parquet_scanner/hive-complex.parquet",
|
||||
&reader, nullptr);
|
||||
@ -283,7 +283,7 @@ static void read_parquet_data_and_check(const std::string& parquet_file,
|
||||
* `list_string` array<string>) // 14
|
||||
*/
|
||||
|
||||
io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
io::FileReaderSPtr reader;
|
||||
auto st = local_fs->open_file(parquet_file, &reader, nullptr);
|
||||
EXPECT_TRUE(st.ok());
|
||||
@ -405,7 +405,7 @@ TEST_F(ParquetThriftReaderTest, group_reader) {
|
||||
lazy_read_ctx.all_read_columns.emplace_back(slot->col_name());
|
||||
read_columns.emplace_back(ParquetReadColumn(7, slot->col_name()));
|
||||
}
|
||||
io::FileSystemSPtr local_fs = std::make_shared<io::LocalFileSystem>("");
|
||||
io::FileSystemSPtr local_fs = io::LocalFileSystem::create("");
|
||||
io::FileReaderSPtr file_reader;
|
||||
auto st = local_fs->open_file("./be/test/exec/test_data/parquet_scanner/type-decoder.parquet",
|
||||
&file_reader, nullptr);
|
||||
|
||||
@ -57,6 +57,12 @@ Related parameters for accessing S3:
|
||||
- `secret_key`: (required)
|
||||
- `use_path_style`: (optional) default `false` . The S3 SDK uses the virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style access method.
|
||||
|
||||
> Note: URI currently supports three SCHEMA: http://, https:// and s3://.
|
||||
> 1. If you use http:// or https://, you will decide whether to use the 'path style' to access s3 based on the 'use_path_style' parameter
|
||||
> 2. If you use s3://, you will use the "virtual-hosted style' to access the s3, 'use_path_style' parameter is invalid.
|
||||
>
|
||||
> For detailed use cases, you can refer to Best Practice at the bottom.
|
||||
|
||||
file format parameter:
|
||||
|
||||
- `format`: (required) Currently support `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`
|
||||
@ -99,8 +105,43 @@ MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.c
|
||||
s3, table-valued-function, tvf
|
||||
|
||||
### Best Practice
|
||||
Since the S3 table-valued-function does not know the table schema in advance, it will read the file first to parse out the table schema.
|
||||
|
||||
**Usage of different uri schemas**
|
||||
Example of http:// 、https://
|
||||
|
||||
```sql
|
||||
// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://.
|
||||
// Because of "use_path_style"="true", s3 will be accessed in 'path style'.
|
||||
select * from s3(
|
||||
"URI" = "https://endpoint/bucket/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv",
|
||||
"use_path_style"="true");
|
||||
|
||||
// Note how to write your bucket of URI and set the 'use_path_style' parameter, as well as http://.
|
||||
// Because of "use_path_style"="false", s3 will be accessed in 'virtual-hosted style'.
|
||||
select * from s3(
|
||||
"URI" = "https://bucket.endpoint/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv",
|
||||
"use_path_style"="false");
|
||||
```
|
||||
|
||||
Example of s3://:
|
||||
|
||||
```sql
|
||||
// Note how to write your bucket of URI, no need to set 'use_path_style'.
|
||||
// s3 will be accessed in 'virtual-hosted style'.
|
||||
select * from s3(
|
||||
"URI" = "s3://bucket.endpoint/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv");
|
||||
```
|
||||
|
||||
Since the S3 table-valued-function does not know the table schema in advance, it will read the file first to parse out the table schema. Specifically, for different file formats:
|
||||
|
||||
**csv foramt**
|
||||
`csv` format: Read the file on S3 and process it as a csv file, read the first line in the file to parse out the table schema. The number of columns in the first line of the file `n` will be used as the number of columns in the table schema, and the column names of the table schema will be automatically named `c1, c2, ..., cn`, and the column type is set to `String` , for example:
|
||||
|
||||
@ -58,7 +58,13 @@ S3 tvf中的每一个参数都是一个 `"key"="value"` 对。
|
||||
- `uri`: (必填) 访问S3的uri,S3表函数会根据 `use_path_style` 参数来决定是否使用 path style 访问方式,默认为 virtual-hosted style 方式
|
||||
- `access_key`: (必填)
|
||||
- `secret_key`: (必填)
|
||||
- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用 path style 方式。
|
||||
- `use_path_style`:(选填) 默认为`false` 。S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持virtual-hosted style 方式的访问,此时我们可以添加 use_path_style 参数来强制使用 path style 方式。比如 `minio`默认情况下只允许`path style`访问方式,所以在访问minio时要加上`use_path_style=true`。
|
||||
|
||||
> 注意:uri目前支持三种schema:http://, https:// 和 s3://
|
||||
> 1. 如果使用http://或https://, 则会根据 'use_path_style' 参数来决定是否使用'path style'方式访问s3
|
||||
> 2. 如果使用s3://, 则都使用 'virtual-hosted style' 方式访问s3, 'use_path_style'参数无效。
|
||||
>
|
||||
> 详细使用案例可以参考最下方 Best Practice。
|
||||
|
||||
文件格式参数:
|
||||
- `format`:(必填) 目前支持 `csv/csv_with_names/csv_with_names_and_types/json/parquet/orc`
|
||||
@ -102,9 +108,44 @@ MySQL [(none)]> Desc function s3("uri" = "http://127.0.0.1:9312/test2/student1.c
|
||||
|
||||
### Best Practice
|
||||
|
||||
由于S3 table-valued-function事先并不知道table schema,所以会先读一遍文件来解析出table schema,具体到不同的文件格式来说:
|
||||
**不同url schema的写法**
|
||||
http:// 、https:// 使用示例:
|
||||
```sql
|
||||
// 注意URI bucket写法以及use_path_style参数设置,http同理。
|
||||
// 由于设置了"use_path_style"="true", 所以将采用path style方式访问s3。
|
||||
select * from s3(
|
||||
"URI" = "https://endpoint/bucket/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv",
|
||||
"use_path_style"="true");
|
||||
|
||||
// 注意URI bucket写法以及use_path_style参数设置,http同理。
|
||||
// 由于设置了"use_path_style"="false", 所以将采用virtual-hosted style方式访问s3。
|
||||
select * from s3(
|
||||
"URI" = "https://bucket.endpoint/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv",
|
||||
"use_path_style"="false");
|
||||
```
|
||||
|
||||
s3:// 使用示例:
|
||||
|
||||
```sql
|
||||
// 注意URI bucket写法, 无需设置use_path_style参数。
|
||||
// 将采用virtual-hosted style方式访问s3。
|
||||
select * from s3(
|
||||
"URI" = "s3://bucket.endpoint/file/student.csv",
|
||||
"ACCESS_KEY"= "ak",
|
||||
"SECRET_KEY" = "sk",
|
||||
"FORMAT" = "csv");
|
||||
```
|
||||
|
||||
|
||||
**csv foramt**
|
||||
由于S3 table-valued-function事先并不知道table schema,所以会先读一遍文件来解析出table schema。
|
||||
|
||||
`csv` 格式: S3 table-valued-function 读取S3上的文件并当作csv文件来处理,读取文件中的第一行用于解析table schema。文件第一行的列个数`n`将作为table schema的列个数,table schema的列名则自动取名为`c1, c2, ..., cn` ,列类型都设置为 `String`, 举例:
|
||||
|
||||
student1.csv文件内容为:
|
||||
|
||||
Reference in New Issue
Block a user