Files
doris/be/src/olap/data_dir.h
sduzh 6fedf5881b [CodeFormat] Clang-format cpp sources (#4965)
Clang-format all c++ source files.
2020-11-28 18:36:49 +08:00

213 lines
7.1 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <condition_variable>
#include <cstdint>
#include <mutex>
#include <set>
#include <string>
#include "common/status.h"
#include "gen_cpp/Types_types.h"
#include "gen_cpp/olap_file.pb.h"
#include "olap/olap_common.h"
#include "olap/rowset/rowset_id_generator.h"
#include "util/metrics.h"
#include "util/mutex.h"
namespace doris {
class Tablet;
class TabletManager;
class TabletMeta;
class TxnManager;
// A DataDir used to manage data in same path.
// Now, After DataDir was created, it will never be deleted for easy implementation.
class DataDir {
public:
DataDir(const std::string& path, int64_t capacity_bytes = -1,
TStorageMedium::type storage_medium = TStorageMedium::HDD,
TabletManager* tablet_manager = nullptr, TxnManager* txn_manager = nullptr);
~DataDir();
Status init();
void stop_bg_worker();
const std::string& path() const { return _path; }
size_t path_hash() const { return _path_hash; }
bool is_used() const { return _is_used; }
void set_is_used(bool is_used) { _is_used = is_used; }
int32_t cluster_id() const { return _cluster_id; }
DataDirInfo get_dir_info() {
DataDirInfo info;
info.path = _path;
info.path_hash = _path_hash;
info.disk_capacity = _disk_capacity_bytes;
info.available = _available_bytes;
info.is_used = _is_used;
info.storage_medium = _storage_medium;
return info;
}
// save a cluster_id file under data path to prevent
// invalid be config for example two be use the same
// data path
Status set_cluster_id(int32_t cluster_id);
void health_check();
OLAPStatus get_shard(uint64_t* shard);
OlapMeta* get_meta() { return _meta; }
bool is_ssd_disk() const { return _storage_medium == TStorageMedium::SSD; }
TStorageMedium::type storage_medium() const { return _storage_medium; }
void register_tablet(Tablet* tablet);
void deregister_tablet(Tablet* tablet);
void clear_tablets(std::vector<TabletInfo>* tablet_infos);
std::string get_absolute_shard_path(int64_t shard_id);
std::string get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, int32_t schema_hash);
void find_tablet_in_trash(int64_t tablet_id, std::vector<std::string>* paths);
static std::string get_root_path_from_schema_hash_path_in_trash(
const std::string& schema_hash_dir_in_trash);
// load data from meta and data files
OLAPStatus load();
void add_pending_ids(const std::string& id);
void remove_pending_ids(const std::string& id);
// this function scans the paths in data dir to collect the paths to check
// this is a producer function. After scan, it will notify the perform_path_gc function to gc
void perform_path_scan();
void perform_path_gc_by_rowsetid();
void perform_path_gc_by_tablet();
OLAPStatus remove_old_meta_and_files();
bool convert_old_data_success();
OLAPStatus set_convert_finished();
// check if the capacity reach the limit after adding the incoming data
// return true if limit reached, otherwise, return false.
// TODO(cmy): for now we can not precisely calculate the capacity Doris used,
// so in order to avoid running out of disk capacity, we currently use the actual
// disk available capacity and total capacity to do the calculation.
// So that the capacity Doris actually used may exceeds the user specified capacity.
bool reach_capacity_limit(int64_t incoming_data_size);
Status update_capacity();
void update_user_data_size(int64_t size);
std::set<TabletInfo> tablet_set() { return _tablet_set; }
void disks_compaction_score_increment(int64_t delta);
void disks_compaction_num_increment(int64_t delta);
private:
std::string _cluster_id_path() const { return _path + CLUSTER_ID_PREFIX; }
Status _init_cluster_id();
Status _init_capacity();
Status _init_file_system();
Status _init_meta();
Status _check_disk();
OLAPStatus _read_and_write_test_file();
Status _read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id);
Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id);
OLAPStatus _clean_unfinished_converting_data();
OLAPStatus _convert_old_tablet();
// Check whether has old format (hdr_ start) in olap. When doris updating to current version,
// it may lead to data missing. When conf::storage_strict_check_incompatible_old_format is true,
// process will log fatal.
OLAPStatus _check_incompatible_old_format_tablet();
void _process_garbage_path(const std::string& path);
void _remove_check_paths(const std::set<std::string>& paths);
bool _check_pending_ids(const std::string& id);
private:
bool _stop_bg_worker = false;
std::string _path;
size_t _path_hash;
// user specified capacity
int64_t _capacity_bytes;
// the actual available capacity of the disk of this data dir
// NOTICE that _available_bytes may be larger than _capacity_bytes, if capacity is set
// by user, not the disk's actual capacity
int64_t _available_bytes;
// the actual capacity of the disk of this data dir
int64_t _disk_capacity_bytes;
TStorageMedium::type _storage_medium;
bool _is_used;
std::string _file_system;
TabletManager* _tablet_manager;
TxnManager* _txn_manager;
int32_t _cluster_id;
// This flag will be set true if this store was not in root path when reloading
bool _to_be_deleted;
// used to protect _current_shard and _tablet_set
std::mutex _mutex;
uint64_t _current_shard;
std::set<TabletInfo> _tablet_set;
static const uint32_t MAX_SHARD_NUM = 1024;
OlapMeta* _meta = nullptr;
RowsetIdGenerator* _id_generator = nullptr;
std::mutex _check_path_mutex;
std::condition_variable _cv;
std::set<std::string> _all_check_paths;
std::set<std::string> _all_tablet_schemahash_paths;
RWMutex _pending_path_mutex;
std::set<std::string> _pending_path_ids;
// used in convert process
bool _convert_old_data_success;
std::shared_ptr<MetricEntity> _data_dir_metric_entity;
IntGauge* disks_total_capacity;
IntGauge* disks_avail_capacity;
IntGauge* disks_data_used_capacity;
IntGauge* disks_state;
IntGauge* disks_compaction_score;
IntGauge* disks_compaction_num;
};
} // namespace doris