[Compaction] Avoid unnecessary compaction (#2839)
It is not necessary to perform compaction in the following cases
1. A tablet has only 2 rowsets, the versions are [0-1] and [2-x]. In this case,
there is no need to perform base compaction because the [0-1] version is an empty version.
Some tables will be partitioned by day, and then each partition will only load one batch of data
each day, so a large number of tablets with rowsets [0-1][2-2] will appear. And these tablets
do not need to be base compaction.
2. The initial value of the `last successful execution time of compaction` is 0,
which causes the first time to determine the time interval from the
last successful execution time of compaction, which always meets the
conditions to trigger cumulative compaction.
This commit is contained in:
@ -67,6 +67,12 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
|
||||
RETURN_NOT_OK(check_version_continuity(_input_rowsets));
|
||||
RETURN_NOT_OK(_check_rowset_overlapping(_input_rowsets));
|
||||
|
||||
if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
|
||||
// the tablet is with rowset: [0-1], [2-y]
|
||||
// and [0-1] has no data. in this situation, no need to do base compaction.
|
||||
return OLAP_ERR_BE_NO_SUITABLE_VERSION;
|
||||
}
|
||||
|
||||
// 1. cumulative rowset must reach base_compaction_num_cumulative_deltas threshold
|
||||
if (_input_rowsets.size() > config::base_compaction_num_cumulative_deltas) {
|
||||
LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name()
|
||||
@ -87,6 +93,11 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() {
|
||||
}
|
||||
|
||||
double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio;
|
||||
if (base_size == 0) {
|
||||
// base_size == 0 means this may be a base version [0-1], which has no data.
|
||||
// set to 1 to void devide by zero
|
||||
base_size = 1;
|
||||
}
|
||||
double cumulative_base_ratio = static_cast<double>(cumulative_total_size) / base_size;
|
||||
|
||||
if (cumulative_base_ratio > base_cumulative_delta_ratio) {
|
||||
|
||||
@ -130,24 +130,37 @@ OLAPStatus CumulativeCompaction::pick_rowsets_to_compact() {
|
||||
// the cumulative point after waiting for a long time, to ensure that the base compaction can continue.
|
||||
|
||||
// check both last success time of base and cumulative compaction
|
||||
int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
|
||||
int64_t now = UnixMillis();
|
||||
int64_t cumu_interval = now - _tablet->last_cumu_compaction_success_time();
|
||||
int64_t base_interval = now - _tablet->last_base_compaction_success_time();
|
||||
if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
|
||||
// before increasing cumulative point, we should make sure all rowsets are non-overlapping.
|
||||
// if at least one rowset is overlapping, we should compact them first.
|
||||
CHECK(candidate_rowsets.size() == transient_rowsets.size())
|
||||
<< "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
|
||||
for (auto& rs : candidate_rowsets) {
|
||||
if (rs->rowset_meta()->is_segments_overlapping()) {
|
||||
_input_rowsets = candidate_rowsets;
|
||||
return OLAP_SUCCESS;
|
||||
int64_t last_cumu = _tablet->last_cumu_compaction_success_time();
|
||||
int64_t last_base = _tablet->last_base_compaction_success_time();
|
||||
if (last_cumu != 0 || last_base != 0) {
|
||||
int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000;
|
||||
int64_t cumu_interval = now - last_cumu;
|
||||
int64_t base_interval = now - last_base;
|
||||
if (cumu_interval > interval_threshold && base_interval > interval_threshold) {
|
||||
// before increasing cumulative point, we should make sure all rowsets are non-overlapping.
|
||||
// if at least one rowset is overlapping, we should compact them first.
|
||||
CHECK(candidate_rowsets.size() == transient_rowsets.size())
|
||||
<< "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size();
|
||||
for (auto& rs : candidate_rowsets) {
|
||||
if (rs->rowset_meta()->is_segments_overlapping()) {
|
||||
_input_rowsets = candidate_rowsets;
|
||||
return OLAP_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
// all candicate rowsets are non-overlapping, increase the cumulative point
|
||||
_tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
|
||||
}
|
||||
} else {
|
||||
// init the compaction success time for first time
|
||||
if (last_cumu == 0) {
|
||||
_tablet->set_last_cumu_compaction_success_time(now);
|
||||
}
|
||||
|
||||
// all candicate rowsets are non-overlapping, increase the cumulative point
|
||||
_tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1);
|
||||
if (last_base == 0) {
|
||||
_tablet->set_last_base_compaction_success_time(now);
|
||||
}
|
||||
}
|
||||
|
||||
return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS;
|
||||
|
||||
Reference in New Issue
Block a user