154 lines
7.4 KiB
Diff
154 lines
7.4 KiB
Diff
From a702402ec8849b6787bc4da51b785d1dbb0556b8 Mon Sep 17 00:00:00 2001
|
|
From: Vinoth Veeraraghavan <vinoth.veeraraghavan@hotmail.com>
|
|
Date: Wed, 4 Jan 2023 10:14:23 +0800
|
|
Subject: [PATCH] Optimization in split flow
|
|
|
|
---
|
|
kvthread.hh | 6 ++++--
|
|
masstree_split.hh | 54 +++++++++++++++++++++--------------------------
|
|
2 files changed, 28 insertions(+), 32 deletions(-)
|
|
|
|
diff --git a/kvthread.hh b/kvthread.hh
|
|
index 01d6919..63df4c4 100644
|
|
--- a/kvthread.hh
|
|
+++ b/kvthread.hh
|
|
@@ -28,6 +28,7 @@
|
|
|
|
enum {
|
|
MT_MERR_OK = 0,
|
|
+ // Errors that will cause operation failure. bad flows are handled
|
|
MT_MERR_MAKE_SPLIT_PRE_ALLOC = 1,
|
|
MT_MERR_MAKE_SPLIT_LEAF_ALLOC = 2,
|
|
MT_MERR_MAKE_NEW_LAYER_LEAF_ALLOC_1 = 3,
|
|
@@ -43,6 +44,8 @@ enum {
|
|
|
|
// Errors that are being handled internally (Operation should succeed even if last error contains them)
|
|
MT_MERR_NON_DISRUPTIVE_ERRORS = 15,
|
|
+ MT_MERR_MAKE_INTERNODE_USE_RESERVED = 16,
|
|
+ MT_MERR_MAKE_INTERNODE_USE_RESERVED_2 = 17,
|
|
|
|
// We should not reach the following errors as they should be covered with other errors in more upper layer
|
|
MT_MERR_NOT_RETURNED_TO_USER_ERRORS = 20,
|
|
@@ -59,7 +62,6 @@ enum {
|
|
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED,
|
|
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2,
|
|
MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED,
|
|
- MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED_2,
|
|
|
|
MT_MERR_NOT_IN_USE_LAST_ENTRY = 40
|
|
};
|
|
@@ -84,7 +86,7 @@ extern volatile mrcu_epoch_type active_epoch;
|
|
#define MAX_MEMTAG_MASSTREE_LIMBO_GROUP_ALLOCATION_SIZE sizeof(mt_limbo_group)
|
|
|
|
// Upper bound for the ksuffixes structure max size.
|
|
-#define MAX_MEMTAG_MASSTREE_KSUFFIXES_ALLOCATION_SIZE(width) iceil_log2(leaf<P>::external_ksuf_type::safe_size(width, MAX_KEY_SIZE * width));
|
|
+#define MAX_MEMTAG_MASSTREE_KSUFFIXES_ALLOCATION_SIZE(width) iceil_log2(leaf<P>::external_ksuf_type::safe_size(width, MASSTREE_MAXKEYLEN * width));
|
|
|
|
inline uint64_t ng_getGlobalEpoch() {
|
|
return globalepoch;
|
|
diff --git a/masstree_split.hh b/masstree_split.hh
|
|
index 2d53de6..a477757 100644
|
|
--- a/masstree_split.hh
|
|
+++ b/masstree_split.hh
|
|
@@ -199,7 +199,6 @@ bool tcursor<P>::make_split(threadinfo& ti)
|
|
// the ikey_bound). But in the latter case, perhaps we can rearrange the
|
|
// permutation to do an insert instead.
|
|
|
|
- //IDAN: LEARN: as we might fail in case the last available slot is 0, why not replace the condition to (n_->size() < n_->width -1) ?
|
|
if (n_->size() < n_->width) {
|
|
permuter_type perm(n_->permutation_);
|
|
perm.exchange(perm.size(), n_->width - 1);
|
|
@@ -217,21 +216,7 @@ bool tcursor<P>::make_split(threadinfo& ti)
|
|
|
|
bool rc = true;
|
|
|
|
- // 2 optimizations that can reduce the number of internodes allocations:
|
|
- // 1. In n_ does not have parent, only 1 internode is required (rare case - only on first split)
|
|
- // 2. In case n_'s parent has extra place, and it's height is 1, we dont need internodes at all (common case, but requires early lock of n_'s parent)
|
|
- node_type* l_root = n_;
|
|
-
|
|
- while (!l_root->is_root()) {
|
|
- if (n_ != l_root) {
|
|
- l_root->stable_annotated(ti.stable_fence());
|
|
- }
|
|
- l_root = l_root->maybe_parent();
|
|
- }
|
|
-
|
|
- // l_root->height_ is the layer real height or higher.
|
|
- uint32_t layer_height = l_root->isleaf() ? 1 : ((internode_type *)l_root)->height_;
|
|
- int reserved_nodes = layer_height + 5; // add 5 extra nodes (extra 5 layers in single b-tree)
|
|
+ int reserved_nodes = 2;
|
|
internode_type * preallocated_internodes[reserved_nodes + 1] = { 0 };
|
|
int cur_cache_index = 0;
|
|
|
|
@@ -274,27 +259,35 @@ bool tcursor<P>::make_split(threadinfo& ti)
|
|
while (true) {
|
|
masstree_invariant(!n->concurrent || (n->locked() && child->locked() && (n->isleaf() || n->splitting())));
|
|
internode_type *next_child = 0;
|
|
-
|
|
internode_type *p = n->locked_parent(ti);
|
|
|
|
+ if (cur_cache_index == reserved_nodes) {
|
|
+ // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
|
|
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED);
|
|
+ }
|
|
+
|
|
int kp = -1;
|
|
if (n->parent_exists(p)) {
|
|
kp = internode_type::bound_type::upper(xikey[sense], *p);
|
|
p->mark_insert();
|
|
}
|
|
|
|
- if (kp < 0 || p->height_ > height + 1) {
|
|
+ // If cur_cache_index == 1, reserved internode was used on last loop due to memory allocation failure.
|
|
+ // In this case, we have only 1 reserved internode left, so stop climbing and add the new internode in the current layer
|
|
+ if (kp < 0 || p->height_ > height + 1 || cur_cache_index == 1) {
|
|
masstree_invariant(preallocated_internodes[cur_cache_index]);
|
|
- internode_type *nn = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
|
|
+ internode_type *nn = internode_type::make(height + 1, ti, nullptr);
|
|
if (!nn) {
|
|
- // Should never happen with pre-allocated internodes. bad flow is not handled
|
|
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED);
|
|
+ ti.set_last_error(MT_MERR_MAKE_INTERNODE_USE_RESERVED);
|
|
+ nn = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
|
|
}
|
|
|
|
- if (cur_cache_index == reserved_nodes) {
|
|
- // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
|
|
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED);
|
|
+ if (!nn) {
|
|
+ // Should never happen with pre-allocated internodes. bad flow is not handled
|
|
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED);
|
|
+ masstree_invariant(false);
|
|
}
|
|
+
|
|
nn->child_[0] = n;
|
|
nn->assign(0, xikey[sense], child);
|
|
nn->nkeys_ = 1;
|
|
@@ -309,15 +302,16 @@ bool tcursor<P>::make_split(threadinfo& ti)
|
|
} else {
|
|
if (p->size() >= p->width) {
|
|
masstree_invariant(preallocated_internodes[cur_cache_index]);
|
|
- next_child = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
|
|
+ next_child = internode_type::make(height + 1, ti, nullptr);
|
|
if (!next_child) {
|
|
- // Should never happen with pre-allocated internodes. bad flow is not handled
|
|
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2);
|
|
+ ti.set_last_error(MT_MERR_MAKE_INTERNODE_USE_RESERVED_2);
|
|
+ next_child = internode_type::make(height + 1, ti, preallocated_internodes[cur_cache_index++]);
|
|
}
|
|
|
|
- if (cur_cache_index == reserved_nodes) {
|
|
- // Should never happen with pre-allocated internodes (we should have enough reserved nodes). bad flow is not handled
|
|
- ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_EMPTY_PRE_ALLOC_NOT_EXPECTED_2);
|
|
+ if (!next_child) {
|
|
+ // Should never happen with pre-allocated internodes. bad flow is not handled
|
|
+ ti.set_last_error(MT_MERR_MAKE_SPLIT_INTERNODE_ALLOC_NOT_EXPECTED_2);
|
|
+ masstree_invariant(false);
|
|
}
|
|
|
|
next_child->assign_version(*p);
|
|
--
|
|
2.17.1
|
|
|