[Bug] tablet meta is not updated correctly after compaction (#3098)

This CL try to fix a potential bug describe in ISSUE: #3097. But I'm not sure this is the root cause.

Also remove lots of verbose log, and fix a memory leak.
This commit is contained in:
Mingyu Chen
2020-03-14 23:39:11 +08:00
committed by GitHub
parent 01a4ab01c4
commit 42931d22cb
9 changed files with 31 additions and 25 deletions

View File

@ -58,7 +58,7 @@ OLAPStatus AlphaRowset::create_reader(std::shared_ptr<RowsetReader>* result) {
}
OLAPStatus AlphaRowset::remove() {
LOG(INFO) << "begin to remove files in rowset " << unique_id()
VLOG(3) << "begin to remove files in rowset " << unique_id()
<< ", version:" << start_version() << "-" << end_version()
<< ", tabletid:" << _rowset_meta->tablet_id();
for (auto segment_group : _segment_groups) {
@ -166,7 +166,9 @@ OLAPStatus AlphaRowset::split_range(
RowBlockPosition step_pos;
std::shared_ptr<SegmentGroup> largest_segment_group = _segment_group_with_largest_size();
if (largest_segment_group == nullptr) {
if (largest_segment_group == nullptr || largest_segment_group->current_num_rows_per_row_block() == 0) {
LOG(WARNING) << "failed to get largest_segment_group. is null: " << (largest_segment_group == nullptr)
<< ". version: " << start_version() << "-" << end_version();
ranges->emplace_back(start_key.to_tuple());
ranges->emplace_back(end_key.to_tuple());
return OLAP_SUCCESS;

View File

@ -186,6 +186,8 @@ OLAPStatus BetaRowsetWriter::_create_segment_writer() {
fs::CreateBlockOptions opts({path});
DCHECK(block_mgr != nullptr);
Status st = block_mgr->create_block(opts, &wblock);
// FIXME(cmy): this is just a temp implementation, will be refactor later.
delete block_mgr;
if (!st.ok()) {
LOG(WARNING) << "failed to create writable block. path=" << path;
return OLAP_ERR_INIT_FAILED;

View File

@ -96,7 +96,7 @@ OLAPStatus RowsetMetaManager::remove(OlapMeta* meta, TabletUid tablet_uid, const
std::string key = ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string();
VLOG(3) << "start to remove rowset, key:" << key;
OLAPStatus status = meta->remove(META_COLUMN_FAMILY_INDEX, key);
LOG(INFO) << "remove rowset key:" << key << " finished";
VLOG(3) << "remove rowset key:" << key << " finished";
return status;
}

View File

@ -216,7 +216,7 @@ bool SegmentGroup::delete_all_files() {
string index_path = construct_index_file_path(seg_id);
string data_path = construct_data_file_path(seg_id);
LOG(INFO) << "delete index file. path=" << index_path;
VLOG(3) << "delete index file. path=" << index_path;
if (remove(index_path.c_str()) != 0) {
// if the errno is not ENOENT, log the error msg.
// ENOENT stands for 'No such file or directory'
@ -228,7 +228,7 @@ bool SegmentGroup::delete_all_files() {
}
}
LOG(INFO) << "delete data file. path=" << data_path;
VLOG(3) << "delete data file. path=" << data_path;
if (remove(data_path.c_str()) != 0) {
if (errno != ENOENT) {
char errmsg[64];

View File

@ -719,10 +719,10 @@ void StorageEngine::start_delete_unused_rowset() {
if (it->second.use_count() != 1) {
++it;
} else if (it->second->need_delete_file()) {
LOG(INFO) << "start to remove rowset:" << it->second->rowset_id()
VLOG(3) << "start to remove rowset:" << it->second->rowset_id()
<< ", version:" << it->second->version().first << "-" << it->second->version().second;
OLAPStatus status = it->second->remove();
LOG(INFO) << "remove rowset:" << it->second->rowset_id() << " finished. status:" << status;
VLOG(3) << "remove rowset:" << it->second->rowset_id() << " finished. status:" << status;
it = _unused_rowsets.erase(it);
}
}
@ -732,7 +732,7 @@ void StorageEngine::start_delete_unused_rowset() {
void StorageEngine::add_unused_rowset(RowsetSharedPtr rowset) {
if (rowset == nullptr) { return; }
_gc_mutex.lock();
LOG(INFO) << "add unused rowset, rowset id:" << rowset->rowset_id()
VLOG(3) << "add unused rowset, rowset id:" << rowset->rowset_id()
<< ", version:" << rowset->version().first
<< "-" << rowset->version().second
<< ", unique id:" << rowset->unique_id();

View File

@ -305,7 +305,7 @@ OLAPStatus Tablet::modify_rowsets(const vector<RowsetSharedPtr>& to_add,
const RowsetSharedPtr Tablet::get_rowset_by_version(const Version& version) const {
auto iter = _rs_version_map.find(version);
if (iter == _rs_version_map.end()) {
LOG(INFO) << "no rowset for version:" << version.first << "-" << version.second
VLOG(3) << "no rowset for version:" << version.first << "-" << version.second
<< ", tablet: " << full_name();
return nullptr;
}
@ -319,7 +319,7 @@ const RowsetSharedPtr Tablet::get_rowset_by_version(const Version& version) cons
const RowsetSharedPtr Tablet::get_inc_rowset_by_version(const Version& version) const {
auto iter = _inc_rs_version_map.find(version);
if (iter == _inc_rs_version_map.end()) {
LOG(INFO) << "no rowset for version:" << version << ", tablet: " << full_name();
VLOG(3) << "no rowset for version:" << version << ", tablet: " << full_name();
return nullptr;
}
RowsetSharedPtr rowset = iter->second;

View File

@ -519,6 +519,8 @@ OLAPStatus TabletMeta::modify_rs_metas(const vector<RowsetMetaSharedPtr>& to_add
remove_delete_predicate_by_version((*it)->version());
}
_rs_metas.erase(it);
// there should be only one rowset match the version
break;
} else {
it++;
}

View File

@ -61,11 +61,11 @@ OLAPStatus EnginePublishVersionTask::finish() {
TabletInfo tablet_info = tablet_rs.first;
RowsetSharedPtr rowset = tablet_rs.second;
LOG(INFO) << "begin to publish version on tablet. "
<< "tablet_id=" << tablet_info.tablet_id
<< ", schema_hash=" << tablet_info.schema_hash
<< ", version=" << version.first
<< ", version_hash=" << version_hash
<< ", transaction_id=" << transaction_id;
<< "tablet_id=" << tablet_info.tablet_id
<< ", schema_hash=" << tablet_info.schema_hash
<< ", version=" << version.first
<< ", version_hash=" << version_hash
<< ", transaction_id=" << transaction_id;
// if rowset is null, it means this be received write task, but failed during write
// and receive fe's publish version task
// this be must return as an error tablet
@ -112,8 +112,8 @@ OLAPStatus EnginePublishVersionTask::finish() {
}
partition_related_tablet_infos.erase(tablet_info);
LOG(INFO) << "publish version successfully on tablet. tablet=" << tablet->full_name()
<< ", transaction_id=" << transaction_id << ", version=" << version.first
<< ", res=" << publish_status;
<< ", transaction_id=" << transaction_id << ", version=" << version.first
<< ", res=" << publish_status;
}
// check if the related tablet remained all have the version

View File

@ -374,11 +374,11 @@ OLAPStatus TxnManager::delete_txn(OlapMeta* meta, TPartitionId partition_id, TTr
#ifndef BE_TEST
StorageEngine::instance()->add_unused_rowset(load_info.rowset);
#endif
LOG(INFO) << "delete transaction from engine successfully."
<< " partition_id: " << key.first
<< ", transaction_id: " << key.second
<< ", tablet: " << tablet_info.to_string()
<< ", rowset: " << (load_info.rowset != nullptr ? load_info.rowset->rowset_id().to_string(): "0");
VLOG(3) << "delete transaction from engine successfully."
<< " partition_id: " << key.first
<< ", transaction_id: " << key.second
<< ", tablet: " << tablet_info.to_string()
<< ", rowset: " << (load_info.rowset != nullptr ? load_info.rowset->rowset_id().to_string(): "0");
}
}
}
@ -455,9 +455,9 @@ void TxnManager::get_txn_related_tablets(const TTransactionId transaction_id,
ReadLock txn_rdlock(&_txn_map_lock);
auto it = _txn_tablet_map.find(key);
if (it == _txn_tablet_map.end()) {
LOG(WARNING) << "could not find tablet for"
<< " partition_id=" << partition_id
<< ", transaction_id=" << transaction_id;
VLOG(3) << "could not find tablet for"
<< " partition_id=" << partition_id
<< ", transaction_id=" << transaction_id;
return;
}
std::map<TabletInfo, TabletTxnInfo>& load_info_map = it->second;