114 lines
3.8 KiB
C++
114 lines
3.8 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "vec/spill/spill_stream.h"
|
|
|
|
#include <glog/logging.h>
|
|
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <utility>
|
|
|
|
#include "io/fs/local_file_system.h"
|
|
#include "runtime/exec_env.h"
|
|
#include "runtime/runtime_state.h"
|
|
#include "runtime/thread_context.h"
|
|
#include "vec/core/block.h"
|
|
#include "vec/spill/spill_reader.h"
|
|
#include "vec/spill/spill_stream_manager.h"
|
|
#include "vec/spill/spill_writer.h"
|
|
|
|
namespace doris::vectorized {
|
|
SpillStream::SpillStream(RuntimeState* state, int64_t stream_id, SpillDataDir* data_dir,
|
|
std::string spill_dir, size_t batch_rows, size_t batch_bytes,
|
|
RuntimeProfile* profile)
|
|
: state_(state),
|
|
stream_id_(stream_id),
|
|
data_dir_(data_dir),
|
|
spill_dir_(std::move(spill_dir)),
|
|
batch_rows_(batch_rows),
|
|
batch_bytes_(batch_bytes),
|
|
query_id_(state->query_id()),
|
|
profile_(profile) {}
|
|
|
|
SpillStream::~SpillStream() {
|
|
bool exists = false;
|
|
auto status = io::global_local_filesystem()->exists(spill_dir_, &exists);
|
|
if (status.ok() && exists) {
|
|
auto query_dir = fmt::format("{}/{}/{}", get_data_dir()->path(), SPILL_GC_DIR_PREFIX,
|
|
print_id(query_id_));
|
|
(void)io::global_local_filesystem()->create_directory(query_dir);
|
|
auto gc_dir = fmt::format("{}/{}", query_dir,
|
|
std::filesystem::path(spill_dir_).filename().string());
|
|
(void)io::global_local_filesystem()->rename(spill_dir_, gc_dir);
|
|
}
|
|
}
|
|
|
|
Status SpillStream::prepare() {
|
|
writer_ = std::make_unique<SpillWriter>(stream_id_, batch_rows_, data_dir_, spill_dir_);
|
|
|
|
reader_ = std::make_unique<SpillReader>(stream_id_, writer_->get_file_path());
|
|
return Status::OK();
|
|
}
|
|
|
|
const TUniqueId& SpillStream::query_id() const {
|
|
return query_id_;
|
|
}
|
|
|
|
const std::string& SpillStream::get_spill_root_dir() const {
|
|
return data_dir_->path();
|
|
}
|
|
Status SpillStream::prepare_spill() {
|
|
return writer_->open();
|
|
}
|
|
|
|
Status SpillStream::spill_block(RuntimeState* state, const Block& block, bool eof) {
|
|
size_t written_bytes = 0;
|
|
RETURN_IF_ERROR(writer_->write(state, block, written_bytes));
|
|
if (eof) {
|
|
RETURN_IF_ERROR(writer_->close());
|
|
total_written_bytes_ = writer_->get_written_bytes();
|
|
writer_.reset();
|
|
} else {
|
|
total_written_bytes_ = writer_->get_written_bytes();
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
Status SpillStream::spill_eof() {
|
|
RETURN_IF_ERROR(writer_->close());
|
|
total_written_bytes_ = writer_->get_written_bytes();
|
|
writer_.reset();
|
|
return Status::OK();
|
|
}
|
|
|
|
Status SpillStream::read_next_block_sync(Block* block, bool* eos) {
|
|
DCHECK(reader_ != nullptr);
|
|
DCHECK(!_is_reading);
|
|
_is_reading = true;
|
|
Defer defer([this] { _is_reading = false; });
|
|
|
|
RETURN_IF_ERROR(reader_->open());
|
|
return reader_->read(block, eos);
|
|
}
|
|
|
|
void SpillStream::decrease_spill_data_usage() {
|
|
data_dir_->update_spill_data_usage(-total_written_bytes_);
|
|
}
|
|
|
|
} // namespace doris::vectorized
|