Co-authored-by: wangt1xiuyi <13547954130@163.com> Co-authored-by: yangqise7en <877793735@qq.com> Co-authored-by: Zach41 <zach_41@163.com>
		
			
				
	
	
		
			290 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			290 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/**
 | 
						|
 * Copyright (c) 2021 OceanBase
 | 
						|
 * OceanBase CE is licensed under Mulan PubL v2.
 | 
						|
 * You can use this software according to the terms and conditions of the Mulan PubL v2.
 | 
						|
 * You may obtain a copy of Mulan PubL v2 at:
 | 
						|
 *          http://license.coscl.org.cn/MulanPubL-2.0
 | 
						|
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
 | 
						|
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
 | 
						|
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 | 
						|
 * See the Mulan PubL v2 for more details.
 | 
						|
 */
 | 
						|
 | 
						|
#include <vector>
 | 
						|
#include <memory>
 | 
						|
#include <limits>
 | 
						|
#include <random>
 | 
						|
#include <cmath>
 | 
						|
#include <string>
 | 
						|
 | 
						|
#include "gtest/gtest.h"
 | 
						|
 | 
						|
#include "lib/codec/ob_composite_codec.h"
 | 
						|
#include "lib/codec/ob_simd_fixed_pfor.h"
 | 
						|
#include "lib/codec/ob_double_delta_zigzag_rle.h"
 | 
						|
#include "lib/codec/ob_delta_zigzag_rle.h"
 | 
						|
#include "lib/codec/ob_delta_zigzag_pfor.h"
 | 
						|
#include "lib/codec/ob_double_delta_zigzag_pfor.h"
 | 
						|
#include "lib/codec/ob_universal_compression.h"
 | 
						|
#include "lib/codec/ob_xor_fixed_pfor.h"
 | 
						|
#include "lib/codec/ob_tiered_codec.h"
 | 
						|
#include "lib/codec/ob_generated_scalar_bp_func.h"
 | 
						|
#include "lib/codec/ob_generated_unalign_simd_bp_func.h"
 | 
						|
 | 
						|
namespace oceanbase
 | 
						|
{
 | 
						|
namespace common
 | 
						|
{
 | 
						|
 | 
						|
using ::testing::TestWithParam;
 | 
						|
using ::testing::Values;
 | 
						|
 | 
						|
class ObBitPackingTest : public ::testing::TestWithParam<std::string> {
 | 
						|
public:
 | 
						|
  virtual void SetUp() {
 | 
						|
    std::string name = GetParam();
 | 
						|
    if (name == "ObSimpleBitPacking") {
 | 
						|
      codec.reset(new ObSimpleBitPacking());
 | 
						|
    } else if (name == "ObSIMDFixedPFor") {
 | 
						|
      codec.reset(new ObSIMDFixedPFor());
 | 
						|
    } else if (name == "ObJustCopy") {
 | 
						|
      codec.reset(new ObJustCopy());
 | 
						|
    } else if (name == "ObDeltaZigzagRle") {
 | 
						|
      codec.reset(new ObDeltaZigzagRle());
 | 
						|
    } else if (name == "ObDeltaZigzagPFor") {
 | 
						|
      codec.reset(new ObDeltaZigzagPFor());
 | 
						|
    } else if (name == "ObDoubleDeltaZigzagRle") {
 | 
						|
      codec.reset(new ObDoubleDeltaZigzagRle());
 | 
						|
    } else if (name == "ObDoubleDeltaZigzagPFor") {
 | 
						|
      codec.reset(new ObDoubleDeltaZigzagPFor());
 | 
						|
    } else if (name == "ObTiredCodec-bp-uni") {
 | 
						|
      auto tired_codec = new ObTiredCodec<ObSimpleBitPacking, ObUniversalCompression>();
 | 
						|
      codec.reset(tired_codec);
 | 
						|
      min_block_size_ = ObTiredCodec<ObSimpleBitPacking, ObUniversalCompression>::BlockSize;
 | 
						|
      codec->set_allocator(alloc_);
 | 
						|
      tired_codec->codec2_.set_compressor_type(ZSTD_1_3_8_COMPRESSOR);
 | 
						|
    } else if (name == "ObXorFixedPfor") {
 | 
						|
      codec.reset(new ObXorFixedPfor());
 | 
						|
      min_block_size_ = ObXorFixedPfor::BlockSize;
 | 
						|
    } else if (name == "ZSTD_1_3_8") {
 | 
						|
      ObUniversalCompression *uc = new ObUniversalCompression();
 | 
						|
      uc->set_compressor_type(ZSTD_1_3_8_COMPRESSOR);
 | 
						|
      codec.reset(uc);
 | 
						|
    } else if (name == "LZ4_191") {
 | 
						|
      ObUniversalCompression *uc = new ObUniversalCompression();
 | 
						|
      uc->set_compressor_type(LZ4_191_COMPRESSOR);
 | 
						|
      codec.reset(uc);
 | 
						|
    } else if (name == "SNAPPY") {
 | 
						|
      ObUniversalCompression *uc = new ObUniversalCompression();
 | 
						|
      uc->set_compressor_type(SNAPPY_COMPRESSOR);
 | 
						|
      uc->set_allocator(alloc_);
 | 
						|
      codec.reset(uc);
 | 
						|
    } else if (name == "ZLIB") {
 | 
						|
      ObUniversalCompression *uc = new ObUniversalCompression();
 | 
						|
      uc->set_allocator(alloc_);
 | 
						|
      uc->set_compressor_type(LZ4_COMPRESSOR);
 | 
						|
      codec.reset(uc);
 | 
						|
    }
 | 
						|
    else {
 | 
						|
      throw new std::logic_error("Unknown codec " + name);
 | 
						|
    }
 | 
						|
    min_block_size_ = codec->get_block_size();
 | 
						|
  }
 | 
						|
 | 
						|
protected:
 | 
						|
  std::unique_ptr<ObCodec> codec;
 | 
						|
  std::vector<uint8_t> in8;
 | 
						|
  std::vector<uint16_t> in16;
 | 
						|
  std::vector<uint32_t> in32;
 | 
						|
  std::vector<uint64_t> in64;
 | 
						|
  std::vector<uint8_t> out8;
 | 
						|
  std::vector<uint16_t> out16;
 | 
						|
  std::vector<uint32_t> out32;
 | 
						|
  std::vector<uint64_t> out64;
 | 
						|
  bool support_int64_{true};
 | 
						|
  bool support_int8_16_{true};
 | 
						|
  uint32_t min_block_size_{0};
 | 
						|
  ObArenaAllocator alloc_;
 | 
						|
  uint32_t repeat_cnt_{50};
 | 
						|
 | 
						|
  void reset()
 | 
						|
  {
 | 
						|
    in8.clear();
 | 
						|
    in16.clear();
 | 
						|
    in32.clear();
 | 
						|
    in64.clear();
 | 
						|
    out8.clear();
 | 
						|
    out16.clear();
 | 
						|
    out32.clear();
 | 
						|
    out64.clear();
 | 
						|
    alloc_.reuse();
 | 
						|
  }
 | 
						|
 | 
						|
  template<class T>
 | 
						|
  void verify(T *in, uint64_t in_cnt)
 | 
						|
  {
 | 
						|
    if (!divisibleby(in_cnt, min_block_size_)) {
 | 
						|
      // if can not divide, just return
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    if ((sizeof(T) == 8) && !support_int64_) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    if (((sizeof(T) == 1) || (sizeof(T) == 2)) && !support_int8_16_) {
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    LIB_LOG(INFO, "verify", KP(in), K(in_cnt), K(sizeof(T)));
 | 
						|
    codec->set_uint_bytes(sizeof(T));
 | 
						|
    uint64_t inital_size = codec->get_max_encoding_size((char *)in, in_cnt * sizeof(T));
 | 
						|
    uint64_t encoded_size = inital_size;
 | 
						|
    char *encoded = new char[encoded_size];
 | 
						|
    memset(encoded, 0xff, inital_size);
 | 
						|
 | 
						|
    uint64_t out_pos = 0;
 | 
						|
    int64_t encode_start_us = ObTimeUtility::current_time();
 | 
						|
 | 
						|
    int ret = OB_SUCCESS;
 | 
						|
    for (int64_t i = 0; i < repeat_cnt_; i++) {
 | 
						|
      out_pos = 0;
 | 
						|
      ret = codec->encode((char *)in,
 | 
						|
                          in_cnt * sizeof(T),
 | 
						|
                          encoded,
 | 
						|
                          encoded_size,
 | 
						|
                          out_pos);
 | 
						|
    }
 | 
						|
    int64_t encode_cost_us = (ObTimeUtility::current_time() - encode_start_us) / repeat_cnt_;
 | 
						|
    double speed = ((in_cnt * sizeof(T)) / (encode_cost_us/(1000LL * 1000.0))) / (1024LL * 1024LL * 1024.0);
 | 
						|
    double comp_ratio = (in_cnt * sizeof(T))/(out_pos * 1.0);
 | 
						|
 | 
						|
    ASSERT_TRUE(out_pos > 0 && out_pos < encoded_size);
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ret);
 | 
						|
 | 
						|
    std::vector<T> t_out;
 | 
						|
    t_out.resize(in_cnt);
 | 
						|
 | 
						|
    uint64_t pos = 0;
 | 
						|
    uint64_t out_pos2 = 0;
 | 
						|
    int64_t decode_start_us = ObTimeUtility::current_time();
 | 
						|
    for (int64_t i = 0; i < repeat_cnt_; i++) {
 | 
						|
      pos = 0;
 | 
						|
      out_pos2 = 0;
 | 
						|
      ret = codec->decode(encoded,
 | 
						|
                          out_pos,
 | 
						|
                          pos,
 | 
						|
                          in_cnt,
 | 
						|
                          reinterpret_cast<char *>(t_out.data()),
 | 
						|
                          in_cnt * sizeof(T),
 | 
						|
                          out_pos2);
 | 
						|
    }
 | 
						|
    int64_t decode_cost_us = (ObTimeUtility::current_time() - decode_start_us) / repeat_cnt_;
 | 
						|
    double decode_speed = (in_cnt * sizeof(T)) / (decode_cost_us/(1000LL * 1000.0)) / (1024LL * 1024 * 1024.0);
 | 
						|
 | 
						|
    std::string name = GetParam();
 | 
						|
    printf("uint%ld_t----%-30s, encode GB/s:%.2f, ratio:%.2f, decode GB/s:%.2f \n", sizeof(T)*8, name.c_str(), speed, comp_ratio, decode_speed);
 | 
						|
 | 
						|
    ASSERT_EQ(OB_SUCCESS, ret);
 | 
						|
    if (pos != out_pos) {
 | 
						|
      ::abort();
 | 
						|
    }
 | 
						|
    ASSERT_EQ(pos, out_pos);
 | 
						|
    ASSERT_EQ(out_pos2, in_cnt * sizeof(T));
 | 
						|
 | 
						|
    bool passed = true;
 | 
						|
    for (size_t i = 0; i < in_cnt; ++i) {
 | 
						|
      if (in[i] != t_out[i]) {
 | 
						|
        passed = false;
 | 
						|
      }
 | 
						|
      EXPECT_EQ(in[i], t_out[i]);
 | 
						|
    }
 | 
						|
    if (!passed) {
 | 
						|
      std::cout << "Test failed with input: ";
 | 
						|
      for (size_t i = 0; i < in_cnt; ++i) {
 | 
						|
        std::cout << in[i] << " ";
 | 
						|
      }
 | 
						|
      std::cout << std::endl;
 | 
						|
      std::cout << "Test failed with output: ";
 | 
						|
      for (size_t i = 0; i < in_cnt; ++i) {
 | 
						|
        std::cout << t_out[i] << " ";
 | 
						|
      }
 | 
						|
      std::cout << std::endl;
 | 
						|
    }
 | 
						|
    delete []encoded;
 | 
						|
    encoded = nullptr;
 | 
						|
  }
 | 
						|
 | 
						|
  template<class UIntT>
 | 
						|
  void _genDataRandom(std::vector<UIntT>& v, uint32_t values, uint32_t repeat_cnt, bool is_inc) {
 | 
						|
    v.clear();
 | 
						|
    std::mt19937_64 e2(123456);
 | 
						|
    std::uniform_int_distribution<UIntT> dist(
 | 
						|
                            std::numeric_limits<UIntT>::min(),
 | 
						|
                            std::numeric_limits<UIntT>::max());
 | 
						|
 | 
						|
    uint64_t x = values/repeat_cnt;
 | 
						|
    uint64_t remain = values%repeat_cnt;
 | 
						|
 | 
						|
    if (is_inc) {
 | 
						|
      UIntT tmp_v = dist(e2);
 | 
						|
      for (uint64_t i = 0; i < values; i++) {
 | 
						|
        v.push_back(tmp_v + i);
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      for (int i = 0; i < x; ++i) {
 | 
						|
        UIntT tmp_v = dist(e2);
 | 
						|
        for (uint64_t j = 0; j < repeat_cnt; j++) {
 | 
						|
          v.push_back(tmp_v);
 | 
						|
        }
 | 
						|
      }
 | 
						|
      UIntT tmp_v2 = dist(e2);
 | 
						|
      for (int i = 0; i < remain; ++i) {
 | 
						|
        v.push_back(tmp_v2);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  template<class UIntT>
 | 
						|
  void _genDataWithFixBits(std::vector<UIntT>& v,
 | 
						|
                           uint32_t bits,
 | 
						|
                           uint32_t values) {
 | 
						|
    v.clear();
 | 
						|
    std::mt19937_64 e2(123456);
 | 
						|
    std::uniform_int_distribution<UIntT> dist(
 | 
						|
                            0,
 | 
						|
                            (bits == (sizeof(UIntT) * 8)) ? (UIntT)(~0ULL) : (UIntT)((1ULL << bits) - 1));
 | 
						|
    for (size_t i = 0; i < values; ++i) {
 | 
						|
      v.push_back(static_cast<UIntT>(dist(e2) | (UIntT)(1ULL << (bits - 1))));
 | 
						|
    }
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
INSTANTIATE_TEST_CASE_P(
 | 
						|
    FastPForLib,
 | 
						|
    ObBitPackingTest,
 | 
						|
    Values("ObSimpleBitPacking",
 | 
						|
           "ObSIMDFixedPFor",
 | 
						|
           "ObJustCopy",
 | 
						|
           "ObDeltaZigzagRle",
 | 
						|
           "ObDeltaZigzagPFor",
 | 
						|
           "ObDoubleDeltaZigzagRle",
 | 
						|
           "ObDoubleDeltaZigzagPFor",
 | 
						|
           "ObTiredCodec-bp-uni",
 | 
						|
           "ObXorFixedPfor",
 | 
						|
           "ZSTD_1_3_8"
 | 
						|
           //"LZ4_191",
 | 
						|
           //"SNAPPY",
 | 
						|
           //"ZLIB"
 | 
						|
           ));
 | 
						|
} // namespace common
 | 
						|
} // namespace oceanbase
 | 
						|
 | 
						|
int main(int argc, char **argv)
 | 
						|
{
 | 
						|
  testing::InitGoogleTest(&argc, argv);
 | 
						|
  system("rm -f test_codec_perfomance.log*");
 | 
						|
  OB_LOGGER.set_file_name("test_codec_perfomance.log", true, false);
 | 
						|
  oceanbase::common::ObLogger::get_logger().set_log_level("INFO");
 | 
						|
 | 
						|
  ::testing::InitGoogleTest(&argc,argv);
 | 
						|
  return RUN_ALL_TESTS();
 | 
						|
}
 |