From 0c9a1ed8aea8977438ed636c28916f0ad3aa7206 Mon Sep 17 00:00:00 2001 From: x Date: Fri, 12 Jan 2018 19:32:21 +0100 Subject: [PATCH] TurboPFor: Bit Pack/UnPack c/c++ header --- bitpack.h | 108 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 37 deletions(-) diff --git a/bitpack.h b/bitpack.h index 54cc62b..8b804f5 100644 --- a/bitpack.h +++ b/bitpack.h @@ -35,46 +35,61 @@ extern "C" { #endif //******************** Bit Packing High Level API - n unlimited *************************************************** -size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitndpack28v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnd1pack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); -size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnzpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); +size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); +size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); +size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitnunpack128v16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnunpack128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); -size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); -size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); +size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); +size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitndunpack128v16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitndunpack128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); -size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); -size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); +size_t bitnd1unpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnd1unpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); +size_t bitnd1unpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitnd1unpack128v16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnd1unpack128v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); -size_t bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out); -size_t bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out); - -size_t bitnzunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); -size_t bitnzunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); -size_t bitnzunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); -size_t bitnzunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitnzunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out); +size_t bitnzunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnzunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); +size_t bitnzunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out); +size_t bitnzunpack128v16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out); +size_t bitnzunpack128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); //******** Bit Packing Low level API **************************************************************** // bipackNN: Pack array with n unsigned (NN bits in[n]) values to the buffer out using nbits per value. Return value = end of compressed buffer out @@ -113,6 +128,13 @@ unsigned char *bitzpack64( uint64_t *__restrict in, unsigned n, const unsigned //-------------------------------------- SIMD ------------------------------------------------------------------------------------------ // Pack array with 128 unsigned (32 bits in[n]) values to the buffer out using nbits per value. Return value = end of compressed buffer out +unsigned char *bitpack128v16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out , unsigned b); +unsigned char *bitdpack128v16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b); +unsigned char *bitd1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b); +unsigned char *bitfpack128v16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b); +unsigned char *bitf1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b); +unsigned char *bitzpack128v16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b); + unsigned char *bitpack128v32( unsigned *__restrict in, unsigned n, unsigned char *__restrict out , unsigned b); unsigned char *bitdpack128v32( unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b); unsigned char *bitd1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b); @@ -203,28 +225,40 @@ unsigned char *bitf1unpack64(const unsigned char *__restrict in, unsigned n, uin // ---------------- SIMD : unpack a SIMD bit packed integer array ------------------------------------------------------------------------------- // SIMD unpack a 128/256 bitpacked integer array. Return value = end of packed buffer in -unsigned char *bitunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); +unsigned char *bitunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned b); +unsigned char *bitzunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b); +unsigned char *bitdunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b); +unsigned char *bitd1unpack128v16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b); +unsigned char *bitfunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b); +unsigned char *bitf1unpack128v16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b); + +unsigned char *bitunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); unsigned char *bitzunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitdunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitd1unpack128v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitfunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitf1unpack128v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); -//unsigned char *bitunpack256w32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); +unsigned char *bitunpack256w32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); -unsigned char *bitunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); +unsigned char *bitunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); unsigned char *bitzunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitdunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitd1unpack256v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitfunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitf1unpack256v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); -unsigned char *bitunpack128h32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); +unsigned char *bitunpack128h32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b); unsigned char *bitzunpack128h32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitdunpack128h32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); unsigned char *bitd1unpack128h32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b); // internal TurboPFor functions: masked unpack +unsigned char *_bitunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned b, unsigned short *__restrict pex, unsigned char *bb); +unsigned char *_bitdunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b, unsigned short *__restrict pex, unsigned char *bb); +unsigned char *_bitd1unpack128v16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b, unsigned short *__restrict pex, unsigned char *bb); +unsigned char *_bitzunpack128v16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start, unsigned b, unsigned short *__restrict pex, unsigned char *bb); + unsigned char *_bitunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b, unsigned *__restrict pex, unsigned char *bb); unsigned char *_bitdunpack128v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb); unsigned char *_bitd1unpack128v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb);