148 lines
5.4 KiB
C++
148 lines
5.4 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "runtime/block_spill_manager.h"
|
|
|
|
#include <fmt/format.h>
|
|
#include <glog/logging.h>
|
|
|
|
#include <algorithm>
|
|
#include <boost/uuid/random_generator.hpp>
|
|
#include <boost/uuid/uuid_io.hpp>
|
|
#include <numeric>
|
|
#include <random>
|
|
|
|
#include "io/fs/file_system.h"
|
|
#include "io/fs/local_file_system.h"
|
|
#include "util/time.h"
|
|
#include "vec/core/block_spill_reader.h"
|
|
#include "vec/core/block_spill_writer.h"
|
|
|
|
namespace doris {
|
|
static const std::string BLOCK_SPILL_DIR = "spill";
|
|
static const std::string BLOCK_SPILL_GC_DIR = "spill_gc";
|
|
BlockSpillManager::BlockSpillManager(const std::vector<StorePath>& paths) : _store_paths(paths) {}
|
|
|
|
Status BlockSpillManager::init() {
|
|
for (const auto& path : _store_paths) {
|
|
auto dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_GC_DIR);
|
|
bool exists = true;
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->exists(dir, &exists));
|
|
if (!exists) {
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir));
|
|
}
|
|
|
|
dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_DIR);
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->exists(dir, &exists));
|
|
if (!exists) {
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir));
|
|
} else {
|
|
auto suffix = ToStringFromUnixMillis(UnixMillis());
|
|
auto gc_dir = fmt::format("{}/{}/{}", path.path, BLOCK_SPILL_GC_DIR, suffix);
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->rename_dir(dir, gc_dir));
|
|
RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(dir));
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
void BlockSpillManager::gc(int64_t max_file_count) {
|
|
if (max_file_count < 1) {
|
|
return;
|
|
}
|
|
bool exists = true;
|
|
int64_t count = 0;
|
|
for (const auto& path : _store_paths) {
|
|
std::string gc_root_dir = fmt::format("{}/{}", path.path, BLOCK_SPILL_GC_DIR);
|
|
|
|
std::error_code ec;
|
|
exists = std::filesystem::exists(gc_root_dir, ec);
|
|
if (ec || !exists) {
|
|
continue;
|
|
}
|
|
std::vector<io::FileInfo> dirs;
|
|
auto st = io::global_local_filesystem()->list(gc_root_dir, false, &dirs, &exists);
|
|
if (!st.ok()) {
|
|
continue;
|
|
}
|
|
for (const auto& dir : dirs) {
|
|
if (dir.is_file) {
|
|
continue;
|
|
}
|
|
std::string abs_dir = fmt::format("{}/{}", gc_root_dir, dir.file_name);
|
|
std::vector<io::FileInfo> files;
|
|
st = io::global_local_filesystem()->list(abs_dir, true, &files, &exists);
|
|
if (!st.ok()) {
|
|
continue;
|
|
}
|
|
if (files.empty()) {
|
|
static_cast<void>(io::global_local_filesystem()->delete_directory(abs_dir));
|
|
if (count++ == max_file_count) {
|
|
return;
|
|
}
|
|
continue;
|
|
}
|
|
for (const auto& file : files) {
|
|
auto abs_file_path = fmt::format("{}/{}", abs_dir, file.file_name);
|
|
static_cast<void>(io::global_local_filesystem()->delete_file(abs_file_path));
|
|
if (count++ == max_file_count) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Status BlockSpillManager::get_writer(int32_t batch_size, vectorized::BlockSpillWriterUPtr& writer,
|
|
RuntimeProfile* profile) {
|
|
int64_t id;
|
|
std::vector<int> indices(_store_paths.size());
|
|
std::iota(indices.begin(), indices.end(), 0);
|
|
std::shuffle(indices.begin(), indices.end(), std::mt19937 {std::random_device {}()});
|
|
|
|
std::string path = _store_paths[indices[0]].path + "/" + BLOCK_SPILL_DIR;
|
|
std::string unique_name = boost::uuids::to_string(boost::uuids::random_generator()());
|
|
path += "/" + unique_name;
|
|
{
|
|
std::lock_guard<std::mutex> l(lock_);
|
|
id = id_++;
|
|
id_to_file_paths_[id] = path;
|
|
}
|
|
|
|
writer.reset(new vectorized::BlockSpillWriter(id, batch_size, path, profile));
|
|
return writer->open();
|
|
}
|
|
|
|
Status BlockSpillManager::get_reader(int64_t stream_id, vectorized::BlockSpillReaderUPtr& reader,
|
|
RuntimeProfile* profile, bool delete_after_read) {
|
|
std::string path;
|
|
{
|
|
std::lock_guard<std::mutex> l(lock_);
|
|
CHECK(id_to_file_paths_.end() != id_to_file_paths_.find(stream_id));
|
|
path = id_to_file_paths_[stream_id];
|
|
}
|
|
reader.reset(new vectorized::BlockSpillReader(stream_id, path, profile, delete_after_read));
|
|
return reader->open();
|
|
}
|
|
|
|
void BlockSpillManager::remove(int64_t stream_id) {
|
|
std::lock_guard<std::mutex> l(lock_);
|
|
id_to_file_paths_.erase(stream_id);
|
|
}
|
|
} // namespace doris
|