Files
doris/be/src/runtime/tmp_file_mgr.h
2019-06-14 23:38:31 +08:00

206 lines
7.6 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_SRC_QUERY_RUNTIME_TMP_FILE_MGR_H
#define DORIS_BE_SRC_QUERY_RUNTIME_TMP_FILE_MGR_H
#include "common/status.h"
#include "gen_cpp/Types_types.h" // for TUniqueId
// #include "util/collection_metrics.h"
#include "util/spinlock.h"
#include "util/metrics.h"
namespace doris {
class MetricRegistry;
class ExecEnv;
// TmpFileMgr creates and manages temporary files and directories on the local
// filesystem. It can manage multiple temporary directories across multiple devices.
// TmpFileMgr ensures that at most one directory per device is used unless overridden
// for testing. GetFile() returns a File handle with a unique filename on a device. The
// client owns the File handle and can use it to expand the file.
// TODO: we could notify block managers about the failure so they can more take
// proactive action to avoid using the device.
class TmpFileMgr {
public:
// DeviceId is a unique identifier for a temporary device managed by TmpFileMgr.
// It is used as a handle for external classes to identify devices.
typedef int DeviceId;
// File is a handle to a physical file in a temporary directory. Clients
// can allocate file space and remove files using AllocateSpace() and Remove().
// Creation of the file is deferred until the first call to AllocateSpace().
class File {
public:
~File(){
// do nothing
}
// Allocates 'write_size' bytes in this file for a new block of data.
// The file size is increased by a call to truncate() if necessary.
// The physical file is created on the first call to AllocateSpace().
// Returns Status::OK()() and sets offset on success.
// Returns an error status if an unexpected error occurs.
// If an error status is returned, the caller can try a different temporary file.
Status allocate_space(int64_t write_size, int64_t* offset);
// Called to notify TmpFileMgr that an IO error was encountered for this file
#if 0
void report_io_error(const ErrorMsg& msg);
#endif
void report_io_error(const std::string& error_msg);
// Delete the physical file on disk, if one was created.
// It is not valid to read or write to a file after calling Remove().
Status remove();
const std::string& path() const {
return _path;
}
int disk_id() const {
return _disk_id;
}
bool is_blacklisted() const {
return _blacklisted;
}
private:
friend class TmpFileMgr;
// The name of the sub-directory that Impala created within each configured scratch
// directory.
const static std::string _s_tmp_sub_dir_name;
// Space (in MB) that must ideally be available for writing on a scratch
// directory. A warning is issued if available space is less than this threshold.
const static uint64_t _s_available_space_threshold_mb;
File(TmpFileMgr* mgr, DeviceId device_id, const std::string& path);
// TmpFileMgr this belongs to.
TmpFileMgr* _mgr;
// Path of the physical file in the filesystem.
std::string _path;
// The temporary device this file is stored on.
DeviceId _device_id;
// The id of the disk on which the physical file lies.
int _disk_id;
// Current file size. Modified by AllocateSpace(). Size is 0 before file creation.
int64_t _current_size;
// Set to true to indicate that file can't be expanded. This is useful to keep here
// even though it is redundant with the global per-device blacklisting in TmpFileMgr
// because it can be checked without acquiring a global lock. If a file is
// blacklisted, the corresponding device will always be blacklisted.
bool _blacklisted;
};
TmpFileMgr(ExecEnv* exec_env);
TmpFileMgr() { }
~TmpFileMgr(){
// do nothing.
}
// Creates the configured tmp directories. If multiple directories are specified per
// disk, only one is created and used. Must be called after DiskInfo::Init().
Status init(MetricRegistry* metrics);
// Custom initialization - initializes with the provided list of directories.
// If one_dir_per_device is true, only use one temporary directory per device.
// This interface is intended for testing purposes.
Status init_custom(
const std::vector<std::string>& tmp_dirs,
bool one_dir_per_device,
MetricRegistry* metrics);
// Return a new File handle with a unique path for a query instance. The file path
// is within the (single) tmp directory on the specified device id. The caller owns
// the returned handle and is responsible for deleting it. The file is not created -
// creation is deferred until the first call to File::AllocateSpace().
Status get_file(const DeviceId& device_id, const TUniqueId& query_id, File** new_file);
// Return the scratch directory path for the device.
std::string get_tmp_dir_path(DeviceId device_id) const;
// Total number of devices with tmp directories that are active. There is one tmp
// directory per device.
int num_active_tmp_devices();
// Return vector with device ids of all tmp devices being actively used.
// I.e. those that haven't been blacklisted.
std::vector<DeviceId> active_tmp_devices();
private:
// Dir stores information about a temporary directory.
class Dir {
public:
const std::string& path() const {
return _path;
}
// Return true if it was newly added to blacklist.
bool blacklist() {
bool was_blacklisted = _blacklisted;
_blacklisted = true;
return !was_blacklisted;
}
bool is_blacklisted() const {
return _blacklisted;
}
private:
friend class TmpFileMgr;
// path should be a absolute path to a writable scratch directory.
Dir(const std::string& path, bool blacklisted) :
_path(path), _blacklisted(blacklisted) {}
std::string _path;
bool _blacklisted;
};
// Remove a device from the rotation. Subsequent attempts to allocate a file on that
// device will fail and the device will not be included in active tmp devices.
void blacklist_device(DeviceId device_id);
bool is_blacklisted(DeviceId device_id);
ExecEnv* _exec_env;
bool _initialized;
// Protects the status of tmp dirs (i.e. whether they're blacklisted).
SpinLock _dir_status_lock;
// The created tmp directories.
std::vector<Dir> _tmp_dirs;
// MetricRegistry to track active scratch directories.
std::unique_ptr<IntGauge> _num_active_scratch_dirs_metric;
// SetMetric<std::string>* _active_scratch_dirs_metric;
};
} // end namespace doris
#endif // DORIS_BE_SRC_QUERY_RUNTIME_TMP_FILE_MGR_H