adapt runtime filter eval_vector interface for single row calculation.

2024-02-09 11:03:55 +00:00
parent 277027c705
commit 1423eb527b
9 changed files with 424 additions and 117 deletions
--- a/src/share/vector/vector_op_util.h
+++ b/src/share/vector/vector_op_util.h
@ -128,8 +128,7 @@ private:
        return Op::template hash<HashMethod, hash_v2>(meta, vec.get_payload(i),
                                          vec.get_length(i), seed_vec[i], hash_values[i]);
      };
-      //TODO shengle flip_foreach bound
-      ret = sql::ObBitVector::flip_foreach(skip, bound.batch_size(), op);
+      ret = sql::ObBitVector::flip_foreach(skip, bound, op);
    } else { /*has_null && !all_active*/
      auto op = [&](const int64_t i) __attribute__((always_inline)) {
        int ret = OB_SUCCESS;
@ -141,8 +140,7 @@ private:
        }
        return ret;
      };
-      //TODO shengle flip_foreach bound
-      ret = sql::ObBitVector::flip_foreach(skip, bound.batch_size(), op);
+      ret = sql::ObBitVector::flip_foreach(skip, bound, op);
    }
    return ret;
  }
--- a/src/sql/engine/expr/ob_expr_join_filter.cpp
+++ b/src/sql/engine/expr/ob_expr_join_filter.cpp
@ -48,14 +48,15 @@ namespace sql
          }

 template <typename ResVec>
-static int proc_if_das(ResVec *res_vec, const ObBitVector &skip, int64_t batch_size);
+static int proc_if_das(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound);

 template <>
-int proc_if_das<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip, int64_t batch_size)
+int proc_if_das<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip,
+                               const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
  if (OB_FAIL(ObBitVector::flip_foreach(
-          skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+          skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
            res_vec->set_int(idx, 1);
            return OB_SUCCESS;
          }))) {
@ -66,26 +67,27 @@ int proc_if_das<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip,

 template <>
 int proc_if_das<IntegerFixedVec>(IntegerFixedVec *res_vec, const ObBitVector &skip,
-                                 int64_t batch_size)
+                                 const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
  uint64_t *data = reinterpret_cast<uint64_t *>(res_vec->get_data());
-  MEMSET(data, 1, (batch_size * res_vec->get_length(0)));
+  MEMSET(data + bound.start(), 1, (bound.range_size() * res_vec->get_length(0)));
  return ret;
 }

 template <typename ResVec>
-static int proc_by_pass(ResVec *res_vec, const ObBitVector &skip, int64_t batch_size,
+static int proc_by_pass(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound,
                        ObExprJoinFilter::ObExprJoinFilterContext *join_filter_ctx);

 template <>
-int proc_by_pass<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip, int64_t batch_size,
+int proc_by_pass<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip,
+                                const EvalBound &bound,
                                ObExprJoinFilter::ObExprJoinFilterContext *join_filter_ctx)
 {
  int ret = OB_SUCCESS;
  int valid_cnt = 0;
  if (OB_FAIL(ObBitVector::flip_foreach(
-          skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+          skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
            ++valid_cnt;
            res_vec->set_int(idx, 1);
            return OB_SUCCESS;
@ -98,14 +100,14 @@ int proc_by_pass<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip,

 template <>
 int proc_by_pass<IntegerFixedVec>(IntegerFixedVec *res_vec, const ObBitVector &skip,
-                                  int64_t batch_size,
+                                  const EvalBound &bound,
                                  ObExprJoinFilter::ObExprJoinFilterContext *join_filter_ctx)
 {
  int ret = OB_SUCCESS;
  uint64_t *data = reinterpret_cast<uint64_t *>(res_vec->get_data());
-  MEMSET(data, 1, (batch_size * res_vec->get_length(0)));
+  MEMSET(data + bound.start(), 1, (bound.range_size() * res_vec->get_length(0)));

-  int64_t valid_cnt = batch_size - skip.accumulate_bit_cnt(batch_size);
+  int64_t valid_cnt = bound.range_size() - skip.accumulate_bit_cnt(bound);
  join_filter_ctx->n_times_ += valid_cnt;
  join_filter_ctx->total_count_ += valid_cnt;
  ObExprJoinFilter::collect_sample_info_batch(*join_filter_ctx, 0, valid_cnt);
@ -532,7 +534,6 @@ int ObExprJoinFilter::eval_filter_vector_internal(
    const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
-  int64_t batch_size = bound.batch_size();
  uint64_t op_id = expr.expr_ctx_id_;
  ObExecContext &exec_ctx = ctx.exec_ctx_;
  ObExprJoinFilterContext *join_filter_ctx = NULL;
@ -543,10 +544,10 @@ int ObExprJoinFilter::eval_filter_vector_internal(
    // join filter ctx may be null in das.
    if (VEC_UNIFORM == res_format) {
      IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-      ret = proc_if_das(res_vec, skip, batch_size);
+      ret = proc_if_das(res_vec, skip, bound);
    } else if (VEC_FIXED == res_format) {
      IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-      ret = proc_if_das(res_vec, skip, batch_size);
+      ret = proc_if_das(res_vec, skip, bound);
    }
    eval_flags.set_all(true);
  } else {
@ -563,10 +564,10 @@ int ObExprJoinFilter::eval_filter_vector_internal(
      // rf_msg_ dynamic_disable: disable filter when filter rate < 0.5
      if (VEC_UNIFORM == res_format) {
        IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-        ret = proc_by_pass(res_vec, skip, batch_size, join_filter_ctx);
+        ret = proc_by_pass(res_vec, skip, bound, join_filter_ctx);
      } else if (VEC_FIXED == res_format) {
        IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-        ret = proc_by_pass(res_vec, skip, batch_size, join_filter_ctx);
+        ret = proc_by_pass(res_vec, skip, bound, join_filter_ctx);
      }
      eval_flags.set_all(true);
    } else if (OB_FAIL(join_filter_ctx->rf_msg_->might_contain_vector(expr, ctx, skip, bound,
--- a/src/sql/engine/ob_bit_vector.h
+++ b/src/sql/engine/ob_bit_vector.h
@ -151,17 +151,31 @@ public:
   */
  template <typename OP>
  static OB_INLINE int flip_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op);
+  template <typename OP>
+  static OB_INLINE int flip_foreach(const ObBitVectorImpl<WordType> &skip, const EvalBound &bound,
+                                    OP op);

  /**
   * access all bit that it's 1
   */
  template <typename OP>
  static OB_INLINE int foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op);
+  template <typename OP>
+  static OB_INLINE int foreach(const ObBitVectorImpl<WordType> &skip, const EvalBound &bound,
+                               OP op);
 public:
  OB_INLINE static int64_t popcount64(uint64_t v);
 private:
+  /**
+   * the pos in [start_idx, end_idx) will be traversed
+   */
  template <bool IS_FLIP, typename OP>
-  static OB_INLINE int inner_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op);
+  static OB_INLINE int inner_foreach(const ObBitVectorImpl<WordType> &skip, int64_t start_idx,
+                                     int64_t end_idx, OP op);
+  template <typename OP>
+  static OB_INLINE int inner_foreach_one_word(const WordType &s_word, const int64_t step_size,
+                                              int64_t &step, OP op);
+
 public:
  WordType data_[0];
 };
@ -630,36 +644,19 @@ inline void ObBitVectorImpl<WordType>::bit_or(const ObBitVectorImpl<WordType> &s
 }

 template <typename WordType>
-template <bool IS_FLIP, typename OP>
-OB_INLINE int ObBitVectorImpl<WordType>::inner_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op)
+template <typename OP>
+OB_INLINE int ObBitVectorImpl<WordType>::inner_foreach_one_word(const WordType &s_word,
+                                                                const int64_t step_size,
+                                                                int64_t &step, OP op)
 {
  int ret = OB_SUCCESS;
-  int64_t tmp_step = 0;
-  typedef uint16_t StepType;
-  const int64_t step_size = sizeof(StepType) * CHAR_BIT;
-  int64_t word_cnt = ObBitVectorImpl<WordType>::word_count(size);
-  int64_t step = 0;
-  const int64_t remain = size % ObBitVectorImpl<WordType>::WORD_BITS;
-  for (int64_t i = 0; i < word_cnt && OB_SUCC(ret); ++i) {
-    WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
-    // bool all_bits = (IS_FLIP ? skip.data_[i] == 0 : (~skip.data_[i]) == 0);
-    if (i >= word_cnt - 1 && remain > 0) {
-      // all_bits = ((IS_FLIP ? skip.data_[i] : ~skip.data_[i]) & ((1LU << remain) - 1)) == 0;
-      s_word = s_word & ((1LU << remain) - 1);
-    }
  if (s_word > 0) {
    WordType tmp_s_word = s_word;
-      tmp_step = step;
+    int64_t tmp_step = step;
    do {
      uint16_t step_val = tmp_s_word & 0xFFFF;
      if (0xFFFF == step_val) {
-          // no skip
-          // last batch ?
-          int64_t mini_cnt = step_size;
-          if (tmp_step + step_size > size) {
-            mini_cnt = size - tmp_step;
-          }
-          for (int64_t j = 0; OB_SUCC(ret) && j < mini_cnt; j++) {
+        for (int64_t j = 0; OB_SUCC(ret) && j < step_size; j++) {
          int64_t k = j + tmp_step;
          ret = op(k);
        }
@ -675,8 +672,64 @@ OB_INLINE int ObBitVectorImpl<WordType>::inner_foreach(const ObBitVectorImpl<Wor
      tmp_s_word >>= step_size;
    } while (tmp_s_word > 0 && OB_SUCC(ret)); // one word-uint64_t
  }
-    step += ObBitVectorImpl<WordType>::WORD_BITS;
-  } // end for
+  step += WORD_BITS;
+  return ret;
+}
+
+template <typename WordType>
+template <bool IS_FLIP, typename OP>
+OB_INLINE int ObBitVectorImpl<WordType>::inner_foreach(const ObBitVectorImpl<WordType> &skip,
+                                                       int64_t start_idx, int64_t end_idx, OP op)
+{
+  int ret = OB_SUCCESS;
+  int64_t tmp_step = 0;
+  typedef uint16_t StepType;
+  const int64_t step_size = sizeof(StepType) * CHAR_BIT;
+
+  int64_t start_cnt = 0;
+  int64_t end_cnt = 0;
+  WordType start_mask = 0;
+  WordType end_mask = 0;
+  get_start_end_mask(start_idx, end_idx, start_mask, end_mask, start_cnt, end_cnt);
+  // eg. start_remain = 5, start_mask = 11111....11100000
+  //                                                |   |
+  //                                                 \ /
+  //                                         nums of '0' == start_remain
+
+  // eg. end_remain = 5, end_mask = 00000000....11111
+  //                                            |   |
+  //                                             \ /
+  //                                     nums of '1' == end_remain
+  int64_t step = WORD_BITS * start_cnt; // the bit pos offset of the first word
+  if (start_cnt == end_cnt) {
+    // if only one word, both start_mask and end_mask should be used
+    WordType one_word_mask = start_mask & end_mask;
+    WordType s_word = (IS_FLIP ? ~skip.data_[start_cnt] : skip.data_[start_cnt]);
+    s_word = s_word & one_word_mask;
+    ret = inner_foreach_one_word(s_word, step_size, step, op);
+  } else {
+    // process first word, which may not a complete word
+    WordType s_word = (IS_FLIP ? ~skip.data_[start_cnt] : skip.data_[start_cnt]);
+    if (start_mask > 0) {
+      s_word = s_word & start_mask;
+    }
+    // process words in the middle, all of these are whole word
+    if (OB_FAIL(inner_foreach_one_word(s_word, step_size, step, op))) {
+    } else {
+      for (int64_t i = start_cnt + 1; i < end_cnt && OB_SUCC(ret); ++i) {
+        WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
+        ret = inner_foreach_one_word(s_word, step_size, step, op);
+      }
+    }
+    if (OB_SUCC(ret)) {
+      // if end_mask > 0, means there is a incomplete word in the last
+      if (end_mask > 0) {
+        WordType s_word = (IS_FLIP ? ~skip.data_[end_cnt] : skip.data_[end_cnt]);
+        s_word = s_word & end_mask;
+        ret = inner_foreach_one_word(s_word, step_size, step, op);
+      }
+    }
+  }
  return ret;
 }

@ -684,14 +737,30 @@ template<typename WordType>
 template <typename OP>
 OB_INLINE int ObBitVectorImpl<WordType>::flip_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op)
 {
-  return ObBitVectorImpl<WordType>::inner_foreach<true, OP>(skip, size, op);
+  return ObBitVectorImpl<WordType>::inner_foreach<true, OP>(skip, 0 /*start_idx*/, size, op);
 }

 template<typename WordType>
 template <typename OP>
 OB_INLINE int ObBitVectorImpl<WordType>::foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op)
 {
-  return ObBitVectorImpl<WordType>::inner_foreach<false, OP>(skip, size, op);
+  return ObBitVectorImpl<WordType>::inner_foreach<false, OP>(skip, 0 /*start_idx*/, size, op);
+}
+
+template <typename WordType>
+template <typename OP>
+OB_INLINE int ObBitVectorImpl<WordType>::flip_foreach(const ObBitVectorImpl<WordType> &skip,
+                                                      const EvalBound &bound, OP op)
+{
+  return ObBitVectorImpl<WordType>::inner_foreach<true, OP>(skip, bound.start(), bound.end(), op);
+}
+
+template <typename WordType>
+template <typename OP>
+OB_INLINE int ObBitVectorImpl<WordType>::foreach (const ObBitVectorImpl<WordType> &skip,
+                                                  const EvalBound &bound, OP op)
+{
+  return ObBitVectorImpl<WordType>::inner_foreach<false, OP>(skip, bound.start(), bound.end(), op);
 }

 } // end namespace sql
--- a/src/sql/engine/px/p2p_datahub/ob_p2p_dh_msg.cpp
+++ b/src/sql/engine/px/p2p_datahub/ob_p2p_dh_msg.cpp
@ -157,15 +157,15 @@ int ObP2PDatahubMsgBase::process_msg_internal(bool &need_free)
 template <>
 int ObP2PDatahubMsgBase::proc_filter_empty<IntegerFixedVec>(IntegerFixedVec *res_vec,
                                                            const ObBitVector &skip,
-                                                            int64_t batch_size,
+                                                            const EvalBound &bound,
                                                            int64_t &total_count,
                                                            int64_t &filter_count)
 {
  int ret = OB_SUCCESS;
  uint64_t *data = reinterpret_cast<uint64_t *>(res_vec->get_data());
-  MEMSET(data, 0, (batch_size * res_vec->get_length(0)));
+  MEMSET(data + bound.start(), 0, (bound.range_size() * res_vec->get_length(0)));

-  int64_t valid_cnt = batch_size - skip.accumulate_bit_cnt(batch_size);
+  int64_t valid_cnt = bound.range_size() - skip.accumulate_bit_cnt(bound);
  total_count += valid_cnt;
  filter_count += valid_cnt;
  return ret;
@ -174,12 +174,13 @@ int ObP2PDatahubMsgBase::proc_filter_empty<IntegerFixedVec>(IntegerFixedVec *res
 template <>
 int ObP2PDatahubMsgBase::proc_filter_empty<IntegerUniVec>(IntegerUniVec *res_vec,
                                                          const ObBitVector &skip,
-                                                          int64_t batch_size, int64_t &total_count,
+                                                          const EvalBound &bound,
+                                                          int64_t &total_count,
                                                          int64_t &filter_count)
 {
  int ret = OB_SUCCESS;
  if (OB_FAIL(ObBitVector::flip_foreach(
-          skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+          skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
            res_vec->set_int(idx, 0);
            ++filter_count;
            ++total_count;
@ -190,11 +191,11 @@ int ObP2PDatahubMsgBase::proc_filter_empty<IntegerUniVec>(IntegerUniVec *res_vec
  return ret;
 }

-int ObP2PDatahubMsgBase::preset_not_match(IntegerFixedVec *res_vec, int64_t batch_size)
+int ObP2PDatahubMsgBase::preset_not_match(IntegerFixedVec *res_vec, const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
  uint64_t *data = reinterpret_cast<uint64_t *>(res_vec->get_data());
-  MEMSET(data, 0, (batch_size * res_vec->get_length(0)));
+  MEMSET(data + bound.start(), 0, (bound.range_size() * res_vec->get_length(0)));
  return ret;
 }

--- a/src/sql/engine/px/p2p_datahub/ob_p2p_dh_msg.h
+++ b/src/sql/engine/px/p2p_datahub/ob_p2p_dh_msg.h
@ -176,9 +176,9 @@ public:
  const ObRegisterDmInfo &get_register_dm_info() { return register_dm_info_; }
  uint64_t &get_dm_cb_node_seq_id() { return dm_cb_node_seq_id_; }
  template <typename ResVec>
-  int proc_filter_empty(ResVec *res_vec, const ObBitVector &skip, int64_t batch_size,
+  int proc_filter_empty(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound,
                      int64_t &total_count, int64_t &filter_count);
-  int preset_not_match(IntegerFixedVec *res_vec, int64_t batch_size);
+  int preset_not_match(IntegerFixedVec *res_vec, const EvalBound &bound);
  TO_STRING_KV(K(p2p_datahub_id_), K_(px_sequence_id), K(tenant_id_), K(timeout_ts_), K(is_active_), K(msg_type_));
 protected:
  common::ObCurTraceId::TraceId trace_id_;
--- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp
+++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp
@ -738,19 +738,19 @@ int ObRFBloomFilterMsg::insert_by_row(

 template <VectorFormat ResFormat, typename ResVec>
 int ObRFBloomFilterMsg::fill_vec_result(ResVec *res_vec, const ObBitVector &skip,
-                                        int64_t batch_size, uint64_t *hash_values,
+                                        const EvalBound &bound, uint64_t *hash_values,
                                        int64_t &total_count, int64_t &filter_count)
 {
  int ret = OB_SUCCESS;
  bool is_match = true;
  const int64_t is_match_payload = 1; // for VEC_FIXED set set_payload, always 1
  if (OB_FAIL(ObBitVector::flip_foreach(
-          skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+          skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
            bloom_filter_.prefetch_bits_block(hash_values[idx]);
            return OB_SUCCESS;
          }))) {
  } else if (OB_FAIL(ObBitVector::flip_foreach(
-                 skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+                 skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
                   ret = bloom_filter_.might_contain(hash_values[idx], is_match);
                   if (OB_SUCC(ret)) {
                     if (ResFormat == VEC_FIXED) {
@ -783,14 +783,13 @@ int ObRFBloomFilterMsg::do_might_contain_vector(
  int64_t total_count = 0;
  int64_t filter_count = 0;
  bool is_match = true;
-  int64_t batch_size = bound.batch_size();
  uint64_t seed = ObExprJoinFilter::JOIN_FILTER_SEED;
  ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
  uint64_t *hash_values = filter_ctx.right_hash_vals_;
  VectorFormat res_format = expr.get_format(ctx);
  if (VEC_FIXED == res_format) {
    IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-    if (OB_FAIL(preset_not_match(res_vec, batch_size))) {
+    if (OB_FAIL(preset_not_match(res_vec, bound))) {
      LOG_WARN("failed to preset_not_match", K(ret));
    }
  }
@ -811,11 +810,11 @@ int ObRFBloomFilterMsg::do_might_contain_vector(
  if (OB_FAIL(ret)) {
  } else if (VEC_UNIFORM == res_format) {
    IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-    ret = fill_vec_result<VEC_UNIFORM, IntegerUniVec>(res_vec, skip, batch_size, hash_values,
+    ret = fill_vec_result<VEC_UNIFORM, IntegerUniVec>(res_vec, skip, bound, hash_values,
                                                      total_count, filter_count);
  } else if (VEC_FIXED == res_format) {
    IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-    ret = fill_vec_result<VEC_FIXED, IntegerFixedVec>(res_vec, skip, batch_size, hash_values,
+    ret = fill_vec_result<VEC_FIXED, IntegerFixedVec>(res_vec, skip, bound, hash_values,
                                                      total_count, filter_count);
  }
  if (OB_FAIL(ret)) {
@ -840,15 +839,14 @@ int ObRFBloomFilterMsg::might_contain_vector(
  if (OB_UNLIKELY(is_empty_)) {
    int64_t total_count = 0;
    int64_t filter_count = 0;
-    const int64_t batch_size = bound.batch_size();
    ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
    VectorFormat res_format = expr.get_format(ctx);
    if (VEC_UNIFORM == res_format) {
      IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    } else if (VEC_FIXED == res_format) {
      IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    }
    if (OB_SUCC(ret)) {
      eval_flags.set_all(true);
--- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.h
+++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.h
@ -115,7 +115,7 @@ private:
  int insert_partition_bloom_filter(ArgVec *arg_vec, const ObBatchRows *child_brs,
                                    uint64_t *batch_hash_values);
  template <VectorFormat ResFormat, typename ResVec>
-  int fill_vec_result(ResVec *res_vec, const ObBitVector &skip, int64_t batch_size,
+  int fill_vec_result(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound,
                      uint64_t *hash_values, int64_t &total_count, int64_t &filter_count);

 public:
--- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp
+++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp
@ -28,15 +28,15 @@ using namespace oceanbase::sql;
 using namespace oceanbase::share;

 template <typename ResVec>
-static int proc_filter_not_active(ResVec *res_vec, const ObBitVector &skip, int64_t batch_size);
+static int proc_filter_not_active(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound);

 template <>
 int proc_filter_not_active<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVector &skip,
-                                          int64_t batch_size)
+                                          const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
  if (OB_FAIL(ObBitVector::flip_foreach(
-          skip, batch_size, [&](int64_t idx) __attribute__((always_inline)) {
+          skip, bound, [&](int64_t idx) __attribute__((always_inline)) {
            res_vec->set_int(idx, 1);
            return OB_SUCCESS;
          }))) {
@ -47,11 +47,11 @@ int proc_filter_not_active<IntegerUniVec>(IntegerUniVec *res_vec, const ObBitVec

 template <>
 int proc_filter_not_active<IntegerFixedVec>(IntegerFixedVec *res_vec, const ObBitVector &skip,
-                                            int64_t batch_size)
+                                            const EvalBound &bound)
 {
  int ret = OB_SUCCESS;
  uint64_t *data = reinterpret_cast<uint64_t *>(res_vec->get_data());
-  MEMSET(data, 1, (batch_size * res_vec->get_length(0)));
+  MEMSET(data + bound.start(), 1, (bound.range_size() * res_vec->get_length(0)));
  return ret;
 }

@ -714,7 +714,7 @@ int ObRFRangeFilterVecMsg::do_might_contain_vector(

  if (VEC_FIXED == res_format) {
    IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-    if (OB_FAIL(preset_not_match(res_vec, batch_size))) {
+    if (OB_FAIL(preset_not_match(res_vec, bound))) {
      LOG_WARN("failed to preset_not_match", K(ret));
    }
  }
@ -734,7 +734,7 @@ int ObRFRangeFilterVecMsg::do_might_contain_vector(
    bool is_match = true;
    const int64_t is_match_payload = 1; // for VEC_FIXED set set_payload, always 1
    ObDatum datum;
-    for (int64_t batch_i  = 0; batch_i < batch_size; ++batch_i) {
+    for (int64_t batch_i  = bound.start(); batch_i < bound.end() && OB_SUCC(ret); ++batch_i) {
      if (skip.at(batch_i)) {
        continue;
      } else {
@ -799,15 +799,14 @@ int ObRFRangeFilterVecMsg::might_contain_vector(
  if (OB_UNLIKELY(is_empty_)) {
    int64_t total_count = 0;
    int64_t filter_count = 0;
-    const int64_t batch_size = bound.batch_size();
    ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
    VectorFormat res_format = expr.get_format(ctx);
    if (VEC_UNIFORM == res_format) {
      IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    } else if (VEC_FIXED == res_format) {
      IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    }
    if (OB_SUCC(ret)) {
      eval_flags.set_all(true);
@ -1545,7 +1544,7 @@ int ObRFInFilterVecMsg::do_might_contain_vector(
  uint64_t *right_hash_vals = filter_ctx.right_hash_vals_;
  if (VEC_FIXED == res_format) {
    IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-    if (OB_FAIL(preset_not_match(res_vec, batch_size))) {
+    if (OB_FAIL(preset_not_match(res_vec, bound))) {
      LOG_WARN("failed to preset_not_match", K(ret));
    }
  }
@ -1573,7 +1572,7 @@ int ObRFInFilterVecMsg::do_might_contain_vector(
    bool is_match = true;
    const int64_t is_match_payload = 1; // for VEC_FIXED set set_payload, always 1
    ObDatum datum;
-    for (int64_t batch_i  = 0; batch_i < batch_size && OB_SUCC(ret); ++batch_i) {
+    for (int64_t batch_i  = bound.start(); batch_i < bound.end() && OB_SUCC(ret); ++batch_i) {
      if (skip.at(batch_i)) {
        continue;
      } else {
@ -1635,15 +1634,14 @@ int ObRFInFilterVecMsg::might_contain_vector(
 {
  int ret = OB_SUCCESS;
  if (!is_active_) {
-    const int64_t batch_size = bound.batch_size();
    ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
    VectorFormat res_format = expr.get_format(ctx);
    if (VEC_UNIFORM == res_format) {
      IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-      ret = proc_filter_not_active(res_vec, skip, batch_size);
+      ret = proc_filter_not_active(res_vec, skip, bound);
    } else if (VEC_FIXED == res_format) {
      IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-      ret = proc_filter_not_active(res_vec, skip, batch_size);
+      ret = proc_filter_not_active(res_vec, skip, bound);
    }
    if (OB_SUCC(ret)) {
      eval_flags.set_all(true);
@ -1651,15 +1649,14 @@ int ObRFInFilterVecMsg::might_contain_vector(
  } else if (OB_UNLIKELY(is_empty_)) {
    int64_t total_count = 0;
    int64_t filter_count = 0;
-    const int64_t batch_size = bound.batch_size();
    ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
    VectorFormat res_format = expr.get_format(ctx);
    if (VEC_UNIFORM == res_format) {
      IntegerUniVec *res_vec = static_cast<IntegerUniVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    } else if (VEC_FIXED == res_format) {
      IntegerFixedVec *res_vec = static_cast<IntegerFixedVec *>(expr.get_vector(ctx));
-      ret = proc_filter_empty(res_vec, skip, batch_size, total_count, filter_count);
+      ret = proc_filter_empty(res_vec, skip, bound, total_count, filter_count);
    }
    if (OB_SUCC(ret)) {
      eval_flags.set_all(true);
--- a/unittest/sql/engine/test_bit_vector.cpp
+++ b/unittest/sql/engine/test_bit_vector.cpp
@ -12,13 +12,13 @@

 #include <gtest/gtest.h>

-#include <cstring>
-
-#include "lib/allocator/page_arena.h"
-#include "common/object/ob_object.h"
-#include "lib/container/ob_se_array.h"
 #include "src/sql/engine/ob_bit_vector.h"
+#include "src/sql/ob_eval_bound.h"

+#define private public
+#define WordType uint64_t
+
+using namespace std;
 namespace oceanbase
 {
 namespace sql
@ -26,15 +26,21 @@ namespace sql
 class ObTestBitVector : public ::testing::Test
 {
 public:
-    ObTestBitVector() {}
-    ~ObTestBitVector() {}
-    virtual void SetUp() {}
-    virtual void TearDown() {}
+  ObTestBitVector()
+  {}
+  ~ObTestBitVector()
+  {}
+  virtual void SetUp()
+  {}
+  virtual void TearDown()
+  {}
+
 private:
  DISALLOW_COPY_AND_ASSIGN(ObTestBitVector);
 };

-void expect_range(ObBitVector *dest_bit_vector, int64_t start, int64_t middle, int64_t end) {
+void expect_range(ObBitVector *dest_bit_vector, int64_t start, int64_t middle, int64_t end)
+{
  for (int64_t i = 0; i < start; i++) {
    EXPECT_EQ(0, dest_bit_vector->at(i));
  }
@ -115,6 +121,243 @@ TEST(ObTestBitVector, bit_or_range)
  test_range(dest_bit_vector, src_bit_vector, 64, 127);

 }
+
+// copy from the previos version ObBitVectorImpl, for check result
+template <bool IS_FLIP, typename OP>
+void copied_inner_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op)
+{
+  int ret = OB_SUCCESS;
+  int64_t tmp_step = 0;
+  typedef uint16_t StepType;
+  const int64_t step_size = sizeof(StepType) * CHAR_BIT;
+  int64_t word_cnt = ObBitVectorImpl<WordType>::word_count(size);
+  int64_t step = 0;
+  const int64_t remain = size % ObBitVectorImpl<WordType>::WORD_BITS;
+  for (int64_t i = 0; i < word_cnt && OB_SUCC(ret); ++i) {
+    WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
+    // bool all_bits = (false ? skip.data_[i] == 0 : (~skip.data_[i]) == 0);
+    if (i >= word_cnt - 1 && remain > 0) {
+      // all_bits = ((false ? skip.data_[i] : ~skip.data_[i]) & ((1LU << remain) - 1)) == 0;
+      s_word = s_word & ((1LU << remain) - 1);
+    }
+    if (s_word > 0) {
+      WordType tmp_s_word = s_word;
+      tmp_step = step;
+      do {
+        uint16_t step_val = tmp_s_word & 0xFFFF;
+        if (0xFFFF == step_val) {
+          // no skip
+          // last batch ?
+          int64_t mini_cnt = step_size;
+          if (tmp_step + step_size > size) {
+            mini_cnt = size - tmp_step;
+          }
+          for (int64_t j = 0; OB_SUCC(ret) && j < mini_cnt; j++) {
+            int64_t k = j + tmp_step;
+            ret = op(k);
+          }
+        } else if (step_val > 0) {
+          do {
+            int64_t start_bit_idx = __builtin_ctz(step_val);
+            int64_t k = start_bit_idx + tmp_step;
+            ret = op(k);
+            step_val &= (step_val - 1);
+          } while (step_val > 0 && OB_SUCC(ret)); // end for, for one step size
+        }
+        tmp_step += step_size;
+        tmp_s_word >>= step_size;
+      } while (tmp_s_word > 0 && OB_SUCC(ret)); // one word-uint64_t
+    }
+    step += ObBitVectorImpl<WordType>::WORD_BITS;
+  } // end for
+}
+
+// 这部分代码不要删除，用于调试新接口，因为ob的单测编译要编译一大堆无效文件，而ob_bit_vector.h这个头文件又被很多地方引用，
+// 导致编译速度巨慢，尽量不要直接在ob_bit_vector.h改代码调试，而是在这里先把接口改正确了，然后再放到ob_bit_vector.h里面
+// 进行调试
+template <bool IS_FLIP, typename OP>
+void my_foreach_bound(const ObBitVectorImpl<WordType> &skip, int64_t start_idx, int64_t end_idx, OP op)
+{
+  int ret = OB_SUCCESS;
+  int64_t tmp_step = 0;
+  typedef uint16_t StepType;
+  const int64_t step_size = sizeof(StepType) * CHAR_BIT;
+
+  int64_t start_cnt = start_idx / ObBitVectorImpl<WordType>::WORD_BITS; // start_idx is included
+  const int64_t begin_remain = start_idx % ObBitVectorImpl<WordType>::WORD_BITS;
+  const int64_t begin_mask = (-1LU << begin_remain);
+
+  int64_t end_cnt = ObBitVectorImpl<WordType>::word_count(end_idx);     // end_idx is not included
+  const int64_t end_remain = end_idx % ObBitVectorImpl<WordType>::WORD_BITS;
+  const int64_t end_mask = (1LU << end_remain) - 1;
+
+  int64_t step = ObBitVectorImpl<WordType>::WORD_BITS * start_cnt;
+  for (int64_t i = start_cnt; i < end_cnt && OB_SUCC(ret); ++i) {
+    WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
+    if (start_cnt == end_cnt - 1) {
+      // if only one word, both begin_mask and end_mask should be used
+      if (begin_remain > 0) {
+        s_word = s_word & begin_mask;
+      }
+      if (end_remain > 0) {
+        s_word = s_word & end_mask;
+      }
+    } else if (i == start_cnt && begin_remain > 0) {
+      // add begin_mask for first word, remove the bit less than start_idx
+      s_word = s_word & begin_mask;
+    } else if (i == end_cnt - 1 && end_remain > 0) {
+      // add end_mask for last word, remove the bit greater equal than end_idx
+      s_word = s_word & end_mask;
+    }
+    if (s_word > 0) {
+      WordType tmp_s_word = s_word;
+      tmp_step = step;
+      do {
+        uint16_t step_val = tmp_s_word & 0xFFFF;
+        if (0xFFFF == step_val) {
+          for (int64_t j = 0; OB_SUCC(ret) && j < step_size; j++) {
+            int64_t k = j + tmp_step;
+            ret = op(k);
+          }
+        } else if (step_val > 0) {
+          do {
+            int64_t start_bit_idx = __builtin_ctz(step_val);
+            int64_t k = start_bit_idx + tmp_step;
+            ret = op(k);
+            step_val &= (step_val - 1);
+          } while (step_val > 0 && OB_SUCC(ret)); // end for, for one step size
+        }
+        tmp_step += step_size;
+        tmp_s_word >>= step_size;
+      } while (tmp_s_word > 0 && OB_SUCC(ret)); // one word-uint64_t
+    }
+    step += ObBitVectorImpl<WordType>::ObBitVectorImpl<WordType>::WORD_BITS;
+  } // end for
+}
+
+void test_foreach_result_random(int64_t batch_size, int64_t start_idx, int64_t end_idx)
+{
+  void *buf = malloc(batch_size);
+  ObBitVector *bit_vector = to_bit_vector(buf);
+  bit_vector->init(batch_size);
+
+  int64_t true_start_idx = common::ObRandom::rand(0, batch_size);
+  int64_t true_end_idx = common::ObRandom::rand(0, batch_size);
+  if (true_start_idx > true_end_idx) {
+    swap(true_start_idx, true_end_idx);
+  }
+
+  bit_vector->set_all(true_start_idx, true_end_idx);
+  EvalBound bound(batch_size, start_idx, end_idx, false);
+
+  // cout << "start_idx: " << start_idx << "\nend_idx: " << end_idx
+  //      << "\ntrue_start_idx: " << true_start_idx << "\ntrue_end_idx: " << true_end_idx << endl;
+
+  // test foreach
+  std::vector<int> result_foreach_ori(batch_size, 0);
+  std::vector<int> result_foreach_batch(batch_size, 0);
+  std::vector<int> result_foreach_bound(batch_size, 0);
+  copied_inner_foreach<false>(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
+    result_foreach_ori[idx] = 1;
+    return OB_SUCCESS;
+  });
+  ObBitVector::foreach (*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
+    result_foreach_batch[idx] = 1;
+    return OB_SUCCESS;
+  });
+  ObBitVector::foreach (*bit_vector, bound, [&](int64_t idx) __attribute__((always_inline)) {
+    result_foreach_bound[idx] = 1;
+    return OB_SUCCESS;
+  });
+
+  // test flip_foreach
+  std::vector<int> result_flip_foreach_ori(batch_size, 0);
+  std::vector<int> result_flip_foreach_batch(batch_size, 0);
+  std::vector<int> result_flip_foreach_bound(batch_size, 0);
+  copied_inner_foreach<true>(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
+    result_flip_foreach_ori[idx] = 1;
+    return OB_SUCCESS;
+  });
+  ObBitVector::flip_foreach(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
+    result_flip_foreach_batch[idx] = 1;
+    return OB_SUCCESS;
+  });
+
+  ObBitVector::flip_foreach(*bit_vector, bound, [&](int64_t idx) __attribute__((always_inline)) {
+    result_flip_foreach_bound[idx] = 1;
+    return OB_SUCCESS;
+  });
+
+  // result结果，0表示未处理，1表示处理
+  for (int64_t i = 0; i < batch_size; ++i) {
+    // 固定check新的batch接口是否和老的batch接口结果是否相同
+    EXPECT_EQ(result_foreach_ori[i], result_foreach_batch[i]);
+    EXPECT_EQ(result_flip_foreach_ori[i], result_flip_foreach_batch[i]);
+
+    // 1. 对于 i < start_idx 部分, bound接口不会处理，只有batch接口和copied接口会处理
+    // 2. 对于 start_idx <= i < end_idx 部分, 所有接口都会处理
+    // 3. 对于 i >= end_idx 部分, 所有接口都不会处理
+    if (i < start_idx) {
+      if (i < true_start_idx) {
+        // 此部分 bit vector 为 0，因此 foreach 结果为 0， flip foreach 结果为 1
+        EXPECT_EQ(0, result_foreach_batch[i]);
+        EXPECT_EQ(1, result_flip_foreach_batch[i]);
+      } else if (i >= true_start_idx && i < true_end_idx) {
+        // 此部分 bit vector 为 1，因此 foreach 结果为 1， flip foreach 结果为 0
+        EXPECT_EQ(1, result_foreach_batch[i]);
+        EXPECT_EQ(0, result_flip_foreach_batch[i]);
+      } else if (i >= true_end_idx) {
+        // 此部分 bit vector 为 0，因此 foreach 结果为 0， flip foreach 结果为 1
+        EXPECT_EQ(0, result_foreach_batch[i]);
+        EXPECT_EQ(1, result_flip_foreach_batch[i]);
+      }
+      // bound接口不会处理这部分数据，因此全部结果为 0
+      EXPECT_EQ(0, result_foreach_bound[i]);
+      EXPECT_EQ(0, result_flip_foreach_bound[i]);
+    } else if (i >= start_idx && i < end_idx) {
+      if (i < true_start_idx) {
+        // 此部分 bit vector 为 0，因此 foreach 结果为 0， flip foreach 结果为 1
+        EXPECT_EQ(0, result_foreach_batch[i]);
+        EXPECT_EQ(1, result_flip_foreach_batch[i]);
+        EXPECT_EQ(0, result_foreach_bound[i]);
+        EXPECT_EQ(1, result_flip_foreach_bound[i]);
+      } else if (i >= true_start_idx && i < true_end_idx) {
+        // 此部分 bit vector 为 1，因此 foreach 结果为 1， flip foreach 结果为 0
+        EXPECT_EQ(1, result_foreach_batch[i]);
+        EXPECT_EQ(0, result_flip_foreach_batch[i]);
+        EXPECT_EQ(1, result_foreach_bound[i]);
+        EXPECT_EQ(0, result_flip_foreach_bound[i]);
+      } else if (i >= true_end_idx) {
+        // 此部分 bit vector 为 0，因此 foreach 结果为 0， flip foreach 结果为 1
+        EXPECT_EQ(0, result_foreach_batch[i]);
+        EXPECT_EQ(1, result_flip_foreach_batch[i]);
+        EXPECT_EQ(0, result_foreach_bound[i]);
+        EXPECT_EQ(1, result_flip_foreach_bound[i]);
+      }
+    } else if (i >= end_idx) {
+      // 所有接口不会处理这部分数据，因此全部结果为 0
+      EXPECT_EQ(0, result_foreach_batch[i]);
+      EXPECT_EQ(0, result_flip_foreach_batch[i]);
+      EXPECT_EQ(0, result_foreach_bound[i]);
+      EXPECT_EQ(0, result_flip_foreach_bound[i]);
+    }
+  }
+}
+
+TEST(ObTestBitVector, test_foreach)
+{
+  int64_t batch_size = common::ObRandom::rand(0, 1024);
+  int64_t round = 100;
+  for (int64_t i = 0; i < round; ++i) {
+    int64_t start_idx = common::ObRandom::rand(0, batch_size);
+    int64_t end_idx = common::ObRandom::rand(0, batch_size);
+    if (start_idx > end_idx) {
+      swap(start_idx, end_idx);
+    }
+    test_foreach_result_random(batch_size, start_idx, end_idx);
+  }
+}
+
 }
 }