[Compaction] Avoid unnecessary compaction (#2839)

It is not necessary to perform compaction in the following cases

1. A tablet has only 2 rowsets, the versions are [0-1] and [2-x]. In this case, 
there is no need to perform base compaction because the [0-1] version is an empty version.

    Some tables will be partitioned by day, and then each partition will only load one batch of data
 each day, so a large number of tablets with rowsets [0-1][2-2] will appear. And these tablets
 do not need to be base compaction.

2. The initial value of the `last successful execution time of compaction` is 0, 
which causes the first time to determine the time interval from the
 last successful execution time of compaction, which always meets the 
conditions to trigger cumulative compaction.
This commit is contained in:
Mingyu Chen
2020-02-06 16:40:38 +08:00
committed by GitHub
parent d549c40fcd
commit f77cfcdb61
2 changed files with 38 additions and 14 deletions

View File

@ -67,6 +67,12 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
RETURN_NOT_OK(check_version_continuity(_input_rowsets));
RETURN_NOT_OK(_check_rowset_overlapping(_input_rowsets));
if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
// the tablet is with rowset: [0-1], [2-y]
// and [0-1] has no data. in this situation, no need to do base compaction.
return OLAP_ERR_BE_NO_SUITABLE_VERSION;
}
// 1. cumulative rowset must reach base_compaction_num_cumulative_deltas threshold
if (_input_rowsets.size() > config::base_compaction_num_cumulative_deltas) {
LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name()
@ -87,6 +93,11 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
}
double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio;
if (base_size == 0) {
// base_size == 0 means this may be a base version [0-1], which has no data.
// set to 1 to void devide by zero
base_size = 1;
}
double cumulative_base_ratio = static_cast<double>(cumulative_total_size) / base_size;
if (cumulative_base_ratio > base_cumulative_delta_ratio) {

View File

@ -130,24 +130,37 @@ OLAPStatus CumulativeCompaction::pick_rowsets_to_compact() {
// the cumulative point after waiting for a long time, to ensure that the base compaction can continue.
// check both last success time of base and cumulative compaction
int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
int64_t now = UnixMillis();
int64_t cumu_interval = now - _tablet->last_cumu_compaction_success_time();
int64_t base_interval = now - _tablet->last_base_compaction_success_time();
if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
// before increasing cumulative point, we should make sure all rowsets are non-overlapping.
// if at least one rowset is overlapping, we should compact them first.
CHECK(candidate_rowsets.size() == transient_rowsets.size())
<< "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
for (auto& rs : candidate_rowsets) {
if (rs->rowset_meta()->is_segments_overlapping()) {
_input_rowsets = candidate_rowsets;
return OLAP_SUCCESS;
int64_t last_cumu = _tablet->last_cumu_compaction_success_time();
int64_t last_base = _tablet->last_base_compaction_success_time();
if (last_cumu != 0 || last_base != 0) {
int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
int64_t cumu_interval = now - last_cumu;
int64_t base_interval = now - last_base;
if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
// before increasing cumulative point, we should make sure all rowsets are non-overlapping.
// if at least one rowset is overlapping, we should compact them first.
CHECK(candidate_rowsets.size() == transient_rowsets.size())
<< "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
for (auto& rs : candidate_rowsets) {
if (rs->rowset_meta()->is_segments_overlapping()) {
_input_rowsets = candidate_rowsets;
return OLAP_SUCCESS;
}
}
// all candicate rowsets are non-overlapping, increase the cumulative point
_tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
}
} else {
// init the compaction success time for first time
if (last_cumu == 0) {
_tablet->set_last_cumu_compaction_success_time(now);
}
// all candicate rowsets are non-overlapping, increase the cumulative point
_tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
if (last_base == 0) {
_tablet->set_last_base_compaction_success_time(now);
}
}
return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS;