Files
openGauss-third_party/dependency/masstree/0004-masstree_reserved_nodes_fix.patch
2023-01-04 10:42:37 +08:00

154 lines
7.4 KiB
Diff

From a702402ec8849b6787bc4da51b785d1dbb0556b8 Mon Sep 17 00:00:00 2001
From: Vinoth Veeraraghavan <vinoth.veeraraghavan@hotmail.com>
Date: Wed, 4 Jan 2023 10:14:23 +0800
Subject: [PATCH] Optimization in split flow
---
kvthread.hh | 6 ++++--
masstree_split.hh | 54 +++++++++++++++++++++--------------------------
2 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/kvthread.hh b/kvthread.hh
index 01d6919..63df4c4 100644
--- a/kvthread.hh
+++ b/kvthread.hh
@@ -28,6 +28,7 @@
enum {
MT_MERR_OK = 0,
+ // Errors that will cause operation failure. bad flows are handled
MT_MERR_MAKE_SPLIT_PRE_ALLOC = 1,
MT_MERR_MAKE_SPLIT_LEAF_ALLOC = 2,
MT_MERR_MAKE_NEW_LAYER_LEAF_ALLOC_1 = 3,
@@ -43,6 +44,8 @@ enum {
// Errors that are being handled internally (Operation should succeed even if last error contains them)
MT_MERR_NON_DISRUPTIVE_ERRORS = 15,
+ MT_MERR_MAKE_INTERNODE_USE_RESERVED = 16,
+ MT_MERR_MAKE_INTERNODE_USE_RESERVED_2 = 17,
// We should not reach the following errors as they should be covered with other errors in more upper layer
MT_MERR_NOT_RETURNED_TO_USER_ERRORS = 20,
@@ -59,7 +62,6 @@ enum {
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED,
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2,
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED,
- MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED_2,
MT_MERR_NOT_IN_USE_LAST_ENTRY = 40
};
@@ -84,7 +86,7 @@ extern volatile mrcu_epoch_type active_epoch;
#define MAX_MEMTAG_MASSTREE_LIMBO_GROUP_ALLOCATION_SIZE sizeof(mt_limbo_group)
// Upper bound for the ksuffixes structure max size.
-#define MAX_MEMTAG_MASSTREE_KSUFFIXES_ALLOCATION_SIZE(width) iceil_log2(leaf<P>::external_ksuf_type::safe_size(width, MAX_KEY_SIZE * width));
+#define MAX_MEMTAG_MASSTREE_KSUFFIXES_ALLOCATION_SIZE(width) iceil_log2(leaf<P>::external_ksuf_type::safe_size(width, MASSTREE_MAXKEYLEN * width));
inline uint64_t ng_getGlobalEpoch() {
return globalepoch;
diff --git a/masstree_split.hh b/masstree_split.hh
index 2d53de6..a477757 100644
--- a/masstree_split.hh
+++ b/masstree_split.hh
@@ -199,7 +199,6 @@ bool tcursor<P>::make_split(threadinfo& ti)
// the ikey_bound). But in the latter case, perhaps we can rearrange the
// permutation to do an insert instead.
- //IDAN: LEARN: as we might fail in case the last available slot is 0, why not replace the condition to (n_->size() < n_->width -1) ?
if (n_->size() < n_->width) {
permuter_type perm(n_->permutation_);
perm.exchange(perm.size(), n_->width - 1);
@@ -217,21 +216,7 @@ bool tcursor<P>::make_split(threadinfo& ti)
bool rc = true;
- // 2 optimizations that can reduce the number of internodes allocations:
- // 1. In n_ does not have parent, only 1 internode is required (rare case - only on first split)
- // 2. In case n_'s parent has extra place, and it's height is 1, we dont need internodes at all (common case, but requires early lock of n_'s parent)
- node_type* l_root = n_;
-
- while (!l_root->is_root()) {
- if (n_ != l_root) {
- l_root->stable_annotated(ti.stable_fence());
- }
- l_root = l_root->maybe_parent();
- }
-
- // l_root->height_ is the layer real height or higher.
- uint32_t layer_height = l_root->isleaf() ? 1 : ((internode_type *)l_root)->height_;
- int reserved_nodes = layer_height + 5; // add 5 extra nodes (extra 5 layers in single b-tree)
+ int reserved_nodes = 2;
internode_type * preallocated_internodes[reserved_nodes + 1] = { 0 };
int cur_cache_index = 0;
@@ -274,27 +259,35 @@ bool tcursor<P>::make_split(threadinfo& ti)
while (true) {
masstree_invariant(!n->concurrent || (n->locked() && child->locked() && (n->isleaf() || n->splitting())));
internode_type *next_child = 0;
-
internode_type *p = n->locked_parent(ti);
+ if (cur_cache_index == reserved_nodes) {
+ // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED);
+ }
+
int kp = -1;
if (n->parent_exists(p)) {
kp = internode_type::bound_type::upper(xikey[sense], *p);
p->mark_insert();
}
- if (kp < 0 || p->height_ > height + 1) {
+ // If cur_cache_index == 1, reserved internode was used on last loop due to memory allocation failure.
+ // In this case, we have only 1 reserved internode left, so stop climbing and add the new internode in the current layer
+ if (kp < 0 || p->height_ > height + 1 || cur_cache_index == 1) {
masstree_invariant(preallocated_internodes[cur_cache_index]);
- internode_type *nn = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
+ internode_type *nn = internode_type::make(height + 1, ti, nullptr);
if (!nn) {
- // Should never happen with pre-allocated internodes. bad flow is not handled
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED);
+ ti.set_last_error(MT_MERR_MAKE_INTERNODE_USE_RESERVED);
+ nn = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
}
- if (cur_cache_index == reserved_nodes) {
- // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED);
+ if (!nn) {
+ // Should never happen with pre-allocated internodes. bad flow is not handled
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED);
+ masstree_invariant(false);
}
+
nn->child_[0] = n;
nn->assign(0, xikey[sense], child);
nn->nkeys_ = 1;
@@ -309,15 +302,16 @@ bool tcursor<P>::make_split(threadinfo& ti)
} else {
if (p->size() >= p->width) {
masstree_invariant(preallocated_internodes[cur_cache_index]);
- next_child = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
+ next_child = internode_type::make(height + 1, ti, nullptr);
if (!next_child) {
- // Should never happen with pre-allocated internodes. bad flow is not handled
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2);
+ ti.set_last_error(MT_MERR_MAKE_INTERNODE_USE_RESERVED_2);
+ next_child = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
}
- if (cur_cache_index == reserved_nodes) {
- // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED_2);
+ if (!next_child) {
+ // Should never happen with pre-allocated internodes. bad flow is not handled
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2);
+ masstree_invariant(false);
}
next_child->assign_version(*p);
--
2.17.1