diff --git a/plugins.cc b/plugins.cc index 6c9029e..7bc0bb4 100644 --- a/plugins.cc +++ b/plugins.cc @@ -20,7 +20,8 @@ - github : https://github.com/powturbo - twitter : https://twitter.com/powturbo - email : powturbo [_AT_] gmail [_DOT_] com -**/ +**/ +// TurboPFor: Integer Compression - Benchmark App #define _LARGEFILE64_SOURCE 1 #define _FILE_OFFSET_BITS 64 #include @@ -89,33 +90,36 @@ enum { P_COPY, #define C_TURBOPFOR CODEC0 - TB_PFORDA, // actually not working w. mingw + TB_PFDA, // actually not working w. mingw TB_FOR, TB_FORDA, - TB_PACK, - TB_NPACK, + TB_BP, + TB_BPN, TB_VBYTE, TB_VSIMPLE, - TB_EFANO, - TB_VS_S1, - TB_VS_S4, - TB_VS_S8, + TB_EF, + TB_VSIMPLE_S1, + TB_VSIMPLE_S4, + TB_VSIMPLE_S8, #ifdef TURBOPFORH #define C_TURBOPFORH CODEC0V #else #define C_TURBOPFORH 0 #endif - TB_PACK128H, + TB_BP128H, #define C_TURBOPFORV CODEC0V - TB_PFOR128, - TB_PFOR256, - TB_PFORN128, - TB_PFORN256, - TB_PACK128V, - TB_PACK256H, - TB_PACK256V, - TB_EFANOV, - TB_EFANO256V, + TB_FOR128V, + TB_PF128V, + TB_PF256V, + TB_PFN128V, + TB_PFN256V, + TB_BP128V, + + TB_FOR256V, + TB_BP256H, + TB_BP256V, + TB_EF128V, + TB_EF256V, #define C_BITSHUFFLE CODEC1 P_BITSHUFFLE, P_BS_LZ, @@ -150,17 +154,17 @@ enum { #define C_POLYCOM CODEC1 PC_OPTPFD, // compression too slow and limited to 28 bits. crashs on some lists PC_VBYTE, - PC_RICE, + PC_RICE, // incl. only as demo, crash on some lists PC_SIMPLE16, // limited to 28 bits. #define C_QMX CODEC1 - P_QMX,P_QMX2,P_QMX3,P_QMX4, + P_QMX,P_QMX2,P_QMX3,P_QMX4, // crash on gov2 #define C_SIMDCOMP CODEC1 SC_PACK, SC_SIMDPACK128, SC_SIMDPACK256, SC_FOR, SC_FORDA, -#define C_SIMPLE8B CODEC1 // //crash on integers w. size 32 bits ! +#define C_SIMPLE8B CODEC1 //crash on integers w. size 32 bits ! AM_SIMPLE8B, #define C_STREAMVBYTE CODEC1 P_STREAMVBYTE, @@ -186,22 +190,9 @@ enum { }; #ifndef __SSSE3__ -#define C_VINTG8IU 0 -#define C_MASKEDVBYTE 0 +#define C_VINTG8IU 0 +#define C_MASKEDVBYTE 0 #endif - #if C_SIMPLE8B -#include "ext/simple8b.h" // optimized simple-8b by powturbo - #endif - - -//------------------------------------------------------------------------------- - #if C_C_BLOSC -#include "ext/c-blosc/blosc/shuffle.h" -#include "ext/c-blosc/blosc/blosc.h" -//#include "ext/c-blosc2/blosc/shuffle.h" -//#include "ext/c-blosc2/blosc/blosc.h" - #endif - #if C_FASTPFOR #include "ext/FastPFor/headers/variablebyte.h" #include "ext/FastPFor/headers/simple16.h" @@ -214,6 +205,15 @@ enum { //#include "ext/FastPFor/headers/compositecodec.h" #endif + #if C_SIMPLE8B +#include "ext/simple8b.h" // optimized simple-8b by powturbo + #endif + + #if C_C_BLOSC +#include "ext/c-blosc/blosc/shuffle.h" +#include "ext/c-blosc/blosc/blosc.h" + #endif + #if C_LIBFOR #include "ext/libfor/for.h" unsigned char *for_selectx( unsigned char *__restrict in, unsigned n, unsigned *__restrict out) { unsigned b = in[4], i; for(i = 0; i < n; i++) out[i] = for_select(in, i); return in + 5 + for_compressed_size_bits(n, b); } @@ -253,7 +253,6 @@ unsigned char *for_selectx( unsigned char *__restrict in, unsigned n, unsigned * #include "bitutil.h" #include "transpose.h" #include "vp4.h" - #endif #if C_ZLIB @@ -309,11 +308,10 @@ unsigned char *simdunpackd1n(uint32_t *in, uint32_t n, uint32_t *out, uint32_t s return (unsigned char *)in; } -/*unsigned char *simdfor_selectx( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { +unsigned char *simdfor_selectx( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { unsigned i; for(i=0; i < n;i++) out[i] = simdselectFOR(start, (const __m128i *)in, b, i); return in + simdpackFOR_compressedbytes(n, b); - }*/ - +} #ifdef __AVX2__ unsigned char *avxpackwithoutmaskn(uint32_t *in, uint32_t n, uint32_t *out, uint32_t b) { uint32_t *ip; @@ -363,96 +361,115 @@ int prunelen = 0x200, ans_seg = 12*1024; unsigned char sbuf[BLK_SIZE*2+64]; -//------------------------------------------------- registry ------------------------------------------------------------------------------------------------- +//------------------------------------------------- plugins: codes+function ------------------------------------------------- +struct codecs codecs[] = { + { C_C_BLOSC, "blosc", "https://github.com/Blosc/c-blosc" }, + { C_FASTPFOR, "FastPFor", "https://github.com/lemire/FastPFor" }, + { C_LZ4, "lz4", "https://github.com/Cyan4973/lz4" }, + { C_LITTLEPACK, "LittleIntPacker", "https://github.com/lemire/LittleIntPacker" }, + { C_LZTURBO, "LzTurbo", "https://sites.google.com/site/powturbo" }, + { C_MASKEDVBYTE, "MaskedVbyte", "http://maskedvbyte.org" }, + { C_POLYCOM, "Polycom", "https://github.com/encode84/bcm" }, + { C_QMX, "QMX", "https://bitbucket.org/andrewtrotman/bench" }, + { C_SIMDCOMP, "simdcomp", "https://github.com/lemire/simdcomp" }, + { C_SIMPLE8B, "Simple-8b optimized", "https://github.com/powturbo/TurboPFor" }, + { C_STREAMVBYTE, "Streamvbyte", "https://github.com/lemire/streamvbyte" }, + { C_TURBOPFOR, "TurboPFor", "https://github.com/powturbo/TurboPFor" }, + { C_VARINTG8IU, "VarintG8IU", "https://github.com/lemire/FastPFor" }, + { C_ZLIB, "zlib", "http://zlib.net\thttps://github.com/madler/zlib" }, + { C_MEMCPY, "memcpy/copy", "--------------------------------------" }, + { -1 } +}; + struct plugs plugs[] = { - { TB_PFOR128, "TurboPFor", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_PFORN128, "TurboPForN", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", ""/*, 0,BLK_V128*/ }, - { TB_PFOR256, "TurboPFor256", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V256 }, - { TB_PFORN256, "TurboPFor256N", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", ""/*, 0,BLK_V256*/ }, - { TB_PFORDA, "TurboPForDA", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_FOR, "TurboFor", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_FORDA, "TurboForDA", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_PACK128V, "TurboPackV", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_PACK128H, "TurboPackH", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V128 }, - { TB_PACK256V, "TurboPack256V", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V256 }, - { TB_PACK256H, "TurboPack256H", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_V256 }, - { TB_PACK, "TurboPack", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", ""/*, 0,BLK_V128*/ }, - { TB_NPACK, "TurboPackN", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", ""/*, 0,BLK_V128*/ }, - { TB_VBYTE, "TurboVByte", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "" }, - { TB_VSIMPLE, "VSimple", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "" }, - { TB_EFANO, "EliasFano", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", PG_DLT }, - { TB_EFANOV, "EliasFanoV", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0, BLK_V128 }, - { TB_EFANO256V, "EliasFano256V", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0, BLK_V256 }, - - { TB_VS_S1, "vs_bitshuffle", C_LZ4, "", "TurboPFor", "", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, - { TB_VS_S4, "vs_s4", C_LZ4, "", "TurboPFor", "", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, - { TB_VS_S8, "vs_s8", C_LZ4, "", "TurboPFor", "", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, + { TB_PF128V, "TurboPFor", C_TURBOPFOR, BLK_V128,0,"","PFor (SSE2)" }, + { TB_PFN128V, "TurboPForN", C_TURBOPFOR, 0, 0,"","PFor (SSE2) large blocks" }, + { TB_PF256V, "TurboPFor256", C_TURBOPFOR, BLK_V256,0,"","PFor (AVX2)" }, + { TB_PFN256V, "TurboPFor256N", C_TURBOPFOR, 0, 0,"","PFor (AVX2) large blocks" }, + { TB_PFDA, "TurboPForDA", C_TURBOPFOR, BLK_V128,0,"","PFor direct access" }, - { TB_TP8_32, "TP8s_32", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_SIZE }, - { TB_TP8V_32, "TP8_32", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_SIZE }, - { TB_TP4V_32, "TP4_32", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_SIZE }, - { TB_ZIGZAG_32, "ZigZag_32", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_SIZE }, - { TB_DELTA_32, "Delta_32", C_TURBOPFOR, "", "TurboPFor", "", "https://github.com/powturbo/TurboPFor", "", 0,BLK_SIZE }, - - { AM_SIMPLE8B, "Simple8b", C_SIMPLE8B, "", "Simple-8b optimized", "", "", "", 0,1024 }, + { TB_FOR, "TurboFor", C_TURBOPFOR, BLK_V128,0,"","FOR" }, + { TB_FORDA, "TurboForDA", C_TURBOPFOR, BLK_V128,0,"","FOR direct access" }, + { TB_FOR128V, "TurboForV", C_TURBOPFOR, BLK_V128,0,"","FOR (SIMD)" }, + { TB_FOR256V, "TurboFor256V", C_TURBOPFOR, BLK_V256,0,"","FOR (AVX2)" }, - { PC_SIMPLE16, "PC_Simple16", C_POLYCOM, "", "Polycom", "", "https://github.com/encode84/bcm", "", 0,BLK_V128 }, - { PC_OPTPFD, "PC_OptPFD", C_POLYCOM, "", "Polycom", "", "", "", 0,BLK_V128}, - { PC_VBYTE, "PC_Vbyte", C_POLYCOM, "", "Polycom", "", "https://github.com/jibsen/brieflz", "" }, - { PC_RICE, "PC_Rice", C_POLYCOM, "", "Polycom optimized", "", "https://github.com/jibsen/brieflz", "", 0,BLK_V128}, - - { P_VARINTG8IU, "VarintG8IU", C_VARINTG8IU, "", "VarintG8IU", "Apache license", "", ""}, - { P_MASKEDVBYTE, "MaskedVbyte", C_MASKEDVBYTE, "", "MaskedVbyte", "Apache license", "http://maskedvbyte.org", ""}, - { P_STREAMVBYTE, "StreamVbyte", C_STREAMVBYTE, "", "StreamVbyte", "Apache license", "", ""}, - - { FP_FASTPFOR, "FP_FastPFor", C_FASTPFOR, "", "FastPFor", "", "", ""/*, 0,BLK_SIZE*/}, - { FP_SIMDFASTPFOR,"FP_SimdFastPFor", C_FASTPFOR, "", "FastPFor", "", "", ""/*, 0,BLK_SIZE*/}, - { FP_OPTPFOR, "FP_OptPFor", C_FASTPFOR, "", "FastPFor", "", "", "", 0}, - { FP_SIMDOPTPFOR, "FP_SIMDOptPFor", C_FASTPFOR, "", "FastPFor", "", "", "", 0}, - { FP_VBYTE, "FP_VByte", C_FASTPFOR, "", "FastPFor", "", "", ""/*, 0,BLK_SIZE*/}, - { FP_SIMPLE8BRLE, "FP_Simple8bRLE", C_FASTPFOR, "", "FastPFor", "", "", ""/*, 0,BLK_SIZE*/}, - - { SC_SIMDPACK128, "SC_SIMDPack128", C_SIMDCOMP, "", "Simdcomp", "", "", "", 0,BLK_V128}, - { SC_SIMDPACK256, "SC_SIMDPack256", C_SIMDCOMP, "", "Simdcomp", "", "", "", 0,BLK_V256}, - { SC_FOR, "SC_For", C_SIMDCOMP, "", "Simdcomp", "", "", "", 0,BLK_V128}, - { SC_FORDA, "SC_ForDA", C_SIMDCOMP, "", "Simdcomp", "", "", "", 0,BLK_V128}, - -// { CL_FASTPFORD1, "CL.SIMDPFORD1", C_SIMDCOMP, "", "Simdcomp", "", "", "", 0,BLK_V128}, - { P_BS_LZ, "blosc_lz", C_C_BLOSC, "", "blosc", "BSD license", "https://github.com/Blosc/c-blosc", "1,2,3,4,5,6,7,8,9", 0,BLK_SIZE }, - { P_BS_LZ4, "blosc_lz4", C_C_BLOSC, "", "blosc", "BSD license", "https://github.com/Blosc/c-blosc", "1,2,3,4,5,6,7,8,9", 0,BLK_SIZE }, - { P_BS_ZLIB, "blosc_zlib", C_C_BLOSC, "", "blosc", "BSD license", "https://github.com/Blosc/c-blosc", "1,2,3,4,5,6,7,8,9", 0,BLK_SIZE }, - - { LF_FOR, "LibFor_For", C_SIMDCOMP, "", "LibFor", "", "", "", 0,BLK_V128}, - { LF_FORX, "LibFor_ForDA", C_SIMDCOMP, "", "LibFor", "", "", "", 0,BLK_V128}, - - { LI_PACK, "LI_Pack", C_LITTLEPACK, "", "LittlePack", "", "", ""/*, 0,BLK_V128*/}, - { LI_TURBOPACK, "LI_TurboPack", C_LITTLEPACK, "", "LittlePack", "", "", ""/*, 0,BLK_V128*/}, - { LI_SCPACK, "LI_SuperPack", C_LITTLEPACK, "", "LittlePack", "", "", ""/*, 0,BLK_V128*/}, - { LI_HORPACK, "LI_HorPack", C_LITTLEPACK, "", "LittlePack", "", "", ""/*, 0,BLK_V128*/}, - { LI_BMIPACK, "LI_BMIPack256", C_LITTLEPACK, "", "LittlePack", "", "", ""/*, 0,BLK_V128*/}, - - { P_QMX, "qmx", C_QMX, "", "QMX", "https://bitbucket.org/andrewtrotman/bench.git", "", ""/*, 0,BLK_V128*/}, - { P_QMX2, "qmx2", C_QMX, "", "QMX", "https://bitbucket.org/andrewtrotman/bench.git", "", ""/*, 0,BLK_V128*/}, - { P_QMX3, "qmx3", C_QMX, "", "QMX", "https://bitbucket.org/andrewtrotman/bench.git", "", ""/*, 0,BLK_V128*/}, - { P_QMX4, "qmx4", C_QMX, "", "QMX", "https://bitbucket.org/andrewtrotman/bench.git", "", ""/*, 0,BLK_V128*/}, + { TB_BP, "TurboPack", C_TURBOPFOR, 0, 0,"","Bit packing (scalar)" }, + { TB_BPN, "TurboPackN", C_TURBOPFOR, 0, 0,"","Bit packing (scalar) large blocks" }, + { TB_BP128V, "TurboPackV", C_TURBOPFOR, BLK_V128,0,"","Bit packing (SSE2 Vertical)" }, + { TB_BP128H, "TurboPackH", C_TURBOPFOR, BLK_V128,0,"","Bit packing (SSE2 Horizontal)" }, + { TB_BP256V, "TurboPack256V", C_TURBOPFOR, BLK_V256,0,"","Bit packing (AVX2 Vertical)" }, - { P_LZT, "LzTurbo", C_LZTURBO, "", "LzTurbo", "", "https://sites.google.com/site/powturbo", "20,21,22,32", 0,BLK_SIZE }, - { P_VSHUF, "VSimpleANS", C_LZTURBO, "", "LzTurbo", "", "https://sites.google.com/site/powturbo", "20,21,22,32", 0,BLK_SIZE }, - { LZ4_, "lz4", C_LZ4, "", "Lz4", "BSD license", "https://github.com/Cyan4973/lz4", "" }, - { LZ4_S1, "lz4_bitshufle", C_LZ4, "", "lz4+bitshuffle", "BSD license", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, - { LZ4_S4, "lz4_tp4", C_LZ4, "", "lz4+Nibble transpose", "BSD license", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, - { LZ4_S8, "lz4_tp8", C_LZ4, "", "lz+byte transpose", "BSD license", "https://github.com/Cyan4973/lz4", "", 0,BLK_SIZE }, - { P_ZLIB, "zlib", C_ZLIB, "","zlib", "zlib license", "http://zlib.net\thttps://github.com/madler/zlib", "", 0,BLK_SIZE }, -// { P_ZSTD, "zstd", C_ZSTD, "1.0.0","ZSTD", "BSD license+Patents","https://github.com/facebook/zstd", "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" }, - //----------------------------------------------------------------------------------- - { P_MCPY, "memcpy", C_MEMCPY, "", "memcpy", "------------", "--------------------------------------", "" }, - { P_COPY, "copy", C_MEMCPY, "", "copy", "", "", "" }, + { TB_VBYTE, "TurboVByte", C_TURBOPFOR, 0, 0,"","Variable byte (scalar)" }, + { TB_VSIMPLE, "VSimple", C_TURBOPFOR, 0, 0,"","Variable simple (scalar)" }, + { TB_EF, "EliasFano", C_TURBOPFOR, 0, PG_DLT,"","Elias fano (scalar)" }, + { TB_EF128V, "EliasFanoV", C_TURBOPFOR, BLK_V128,0,"","Eliasfano (SSE2)" }, + { TB_EF256V, "EliasFano256V", C_TURBOPFOR, BLK_V256,0,"","Elias fano (AVX2" }, + + { TB_VSIMPLE_S1, "vs_bitshuffle", C_LZ4, BLK_SIZE }, + { TB_VSIMPLE_S4, "vs_s4", C_LZ4, BLK_SIZE }, + { TB_VSIMPLE_S8, "vs_s8", C_LZ4, BLK_SIZE }, + + { AM_SIMPLE8B, "Simple8b", C_SIMPLE8B, 1024, 0,"","64 bits Simple family (instable)" }, + + { PC_SIMPLE16, "PC_Simple16", C_POLYCOM, BLK_V128,0,"","Simple 16. limited to 28 bits" }, + { PC_OPTPFD, "PC_OptPFD", C_POLYCOM, BLK_V128,0,"","OptPFD. limited to 28 bits" }, + { PC_VBYTE, "PC_Vbyte", C_POLYCOM, 0, 0,"","Variable byte" }, + { PC_RICE, "PC_Rice", C_POLYCOM, BLK_V128,0,"","Rice coding (instable)" }, + + { P_VARINTG8IU, "VarintG8IU", C_VARINTG8IU, 0, 0,"","Variable byte SIMD" }, + { P_MASKEDVBYTE, "MaskedVbyte", C_MASKEDVBYTE, 0, 0,"","Variable byte SIMD" }, + { P_STREAMVBYTE, "StreamVbyte", C_STREAMVBYTE, 0, 0,"","Variable byte SIMD" }, + + { FP_FASTPFOR, "FP_FastPFor", C_FASTPFOR, 0, 0,"","PFor scalar (inefficient for small blocks)" }, + { FP_SIMDFASTPFOR,"FP_SimdFastPFor", C_FASTPFOR, 0, 0,"","PFor SIMD (inefficient for small blocks)" }, + { FP_OPTPFOR, "FP_OptPFor", C_FASTPFOR, 0, 0,"","OptPFor scalar" }, + { FP_SIMDOPTPFOR, "FP_SIMDOptPFor", C_FASTPFOR, 0, 0,"","OptPFor SIMD" }, + { FP_VBYTE, "FP_VByte", C_FASTPFOR, 0, 0,"","Variable byte" }, + { FP_SIMPLE8BRLE, "FP_Simple8bRLE", C_FASTPFOR, 0, 0,"","Simple-8b + rle" }, + + { SC_SIMDPACK128, "SC_SIMDPack128", C_SIMDCOMP, BLK_V128,0,"","Bit packing (SSE4.1)"}, + { SC_SIMDPACK256, "SC_SIMDPack256", C_SIMDCOMP, BLK_V256,0,"","Bit packing (SSE4.1)"}, + { SC_FOR, "SC_For", C_SIMDCOMP, BLK_V128,0,"","For (SSE4.1)"}, + { SC_FORDA, "SC_ForDA", C_SIMDCOMP, BLK_V128,0,"","For direct access (SSE4.1)"}, + +//{ CL_FASTPFORD1, "CL.SIMDPFORD1", C_SIMDCOMP, "", 0,BLK_V128}, + { P_BS_LZ, "blosc_lz", C_C_BLOSC, BLK_SIZE,0, "","TurboPFor's [delta]+transpose+lz4" }, + { P_BS_LZ4, "blosc_lz4", C_C_BLOSC, BLK_SIZE,0, "","TurboPFor's [delta]+transpose+lz4" }, + { P_BS_ZLIB, "blosc_zlib", C_C_BLOSC, BLK_SIZE,0, "","TurboPFor's [delta]+transpose+lz4" }, + + { LF_FOR, "LibFor_For", C_SIMDCOMP, BLK_V128,0,"","For"}, + { LF_FORX, "LibFor_ForDA", C_SIMDCOMP, BLK_V128,0,"","For direct access"}, + + { LI_PACK, "LI_Pack", C_LITTLEPACK, 0, 0,"","Bit packing (scalar)" }, + { LI_TURBOPACK, "LI_TurboPack", C_LITTLEPACK, 0, 0,"","Bit packing (scalar)" }, + { LI_SCPACK, "LI_SuperPack", C_LITTLEPACK, 0, 0,"","Bit packing (scalar)" }, + { LI_HORPACK, "LI_HorPack", C_LITTLEPACK, 0, 0,"","Bit packing (sse4.1 horizontal)" }, + { LI_BMIPACK, "LI_BMIPack256", C_LITTLEPACK, 0, 0,"","Bit packing (avx2)" }, + + { P_QMX, "qmx", C_QMX, 0, 0,"","QMX SIMD (inefficient for small blocks)" }, + { P_QMX2, "qmx2", C_QMX, 0, 0,"","QMX SIMD (inefficient for small blocks)" }, + { P_QMX3, "qmx3", C_QMX, 0, 0,"","QMX SIMD (inefficient for small blocks)" }, + { P_QMX4, "qmx4", C_QMX, 0, 0,"","QMX SIMD (inefficient for small blocks)" }, + //-------- lz77 + [delta] + transpose/shuffle --------------- + { P_LZT, "LzTurbo", C_LZTURBO, BLK_SIZE, 0, "20,21,22,32" }, + { P_VSHUF, "VSimpleANS", C_LZTURBO, BLK_SIZE, 0, "20,21,22,32" }, + { LZ4_, "lz4", C_LZ4, BLK_SIZE, 0, "","lz4" }, + { LZ4_S1, "lz4_bitshufle", C_LZ4, BLK_SIZE, 0, "","Bitshuffle + lz4" }, + { LZ4_S4, "lz4_tp4", C_LZ4, BLK_SIZE, 0, "","TurboPFor's [delta]+byte transpose + lz4" }, + { LZ4_S8, "lz4_tp8", C_LZ4, BLK_SIZE, 0, "","TurboPFor's [delta]+transpose + lz4" }, + { P_ZLIB, "zlib", C_ZLIB, BLK_SIZE, 0, "","TurboPFor's [delta]+transpose + zlib" }, //----- Transform -------------- + { P_MCPY, "memcpy", C_MEMCPY }, + { P_COPY, "copy", C_MEMCPY }, - { P_BITSHUFFLE, "BitShuffle", C_TURBOPFOR, "", "BitShuffle", "", "", "", 0,BLK_SIZE }, - { P_BS_SHUFFLE, "Blosc_Shuffle", C_C_BLOSC, "", "Blosc", "", "https://github.com/Blosc/c-blosc2", "", 0,BLK_SIZE }, - -//{ P_MYCODEC, "mycodec", C_MYCODEC, "0", "My codec", " ", "", "" }, + { TB_TP8_32, "TP8s_32", C_TURBOPFOR, BLK_SIZE,0,"","Byte Transpose (scalar)" }, + { TB_TP8V_32, "TP8_32", C_TURBOPFOR, BLK_SIZE,0,"","Byte transpose (sse)" }, + { TB_TP4V_32, "TP4_32", C_TURBOPFOR, BLK_SIZE,0,"","Nibble transpose (sse)" }, + { TB_ZIGZAG_32, "ZigZag_32", C_TURBOPFOR, BLK_SIZE,0,"","ZigZag encoding (sse2)" }, + { TB_DELTA_32, "Delta_32", C_TURBOPFOR, BLK_SIZE,0,"","Delta encoding (sse2)"}, + + { P_BITSHUFFLE, "BitShuffle", C_TURBOPFOR, BLK_SIZE,0,"","Bit shuffle (SIMD)" }, + { P_BS_SHUFFLE, "Blosc_Shuffle", C_C_BLOSC, BLK_SIZE,0,"","Blosc's shuffle (SIMD)" }, #ifdef LZTURBO // #include "../beplugr.h" #endif @@ -460,28 +477,23 @@ struct plugs plugs[] = { }; struct plugg plugg[] = { - { "TURBOPFOR","TURBOPFOR", "TurboPFor/TurboPFor256/TurboPackV256/TurboPackV/TurboVByte/TurboPack/TurboForDA/EliasFano" }, - { "DEFAULT", "Default", "TurboPFor/TurboPFor256/TurboPackV256/TurboPackV/TurboVByte/TurboPack" }, - { "BENCH", "Benchmark", "TurboPFor/TurboPackV/TurboVByte/TurboPack/QMX/FP.SimdFastPfor/FP.SimdOptPFor/MaskedVbyte/StreamVbyte" }, - { "EFFICIENT","Efficient", "TurboPFor/vsimple/turbovbyte" }, + { "TURBOPFOR","TURBOPFOR", "TurboPFor/TurboPFor256/TurboPackV256/TurboPackV/TurboVByte/TurboPack/TurboForDA/EliasFano/VSimple" }, + { "DEFAULT", "Default", "TurboPFor/TurboPFor256/TurboPackV256/TurboPackV/TurboVByte/TurboPack" }, + { "BENCH", "Benchmark", "TurboPFor/TurboPackV/TurboVByte/TurboPack/QMX/FP.SimdFastPfor/FP.SimdOptPFor/MaskedVbyte/StreamVbyte" }, + { "EFFICIENT","Efficient", "TurboPFor/vsimple/turbovbyte" }, - { "BITPACK", "Bit Packing", "TurboPackV/TurboPack256V/TurboPackH/TurboPack/SC.SimdPack128/SC.SimdPack256" }, - { "VBYTE", "Variable byte", "TurboVByte/FP.VByte/PC.Vbyte/VarintG8IU/MaskedVbyte/StreamVbyte" }, - { "SIMPLE", "Simple Family", "simple8b/simple16/vsimple/qmx" }, - { "LZ4", "lz4+bitshufle/transpose 4/8", "lz4_bitshuffle/lz4_tp4/lz4_tp8" }, + { "BITPACK", "Bit Packing", "TurboPackV/TurboPack256V/TurboPackH/TurboPack/SC.SimdPack128/SC.SimdPack256" }, + { "VBYTE", "Variable byte", "TurboVByte/FP.VByte/PC.Vbyte/VarintG8IU/MaskedVbyte/StreamVbyte" }, + { "SIMPLE", "Simple Family", "simple8b/simple16/vsimple/qmx" }, + { "LZ4", "lz4+bitshufle/transpose 4/8", "lz4_bitshuffle/lz4_tp4/lz4_tp8" }, - { "LI", "Little Integer", "LI.Pack/LI.TurboPack/LI.SuperPack/LI.HorPack/LI.BMIPack256" }, + { "LI", "Little Integer", "LI.Pack/LI.TurboPack/LI.SuperPack/LI.HorPack/LI.BMIPack256" }, { "" } }; -//----------------------------------- -#define VBPUT32(a,b) vbxput32(a,b) -#define VBGET32(a,b) vbxget32(a,b) - #define _TRANSFORM // Transform functions : transpose,zigzag #define _TP_BITS 1 // transpose bits (4,8,16,32,64) -//--------------------------------------- TurboPFor ---------------------------- #if C_C_BLOSC #define BITSHUFFLE(in,n,out) bitshuffle( 1, n, (unsigned char *)in, out, NULL) #define BITUNSHUFFLE(in,n,out) bitunshuffle( 1, n, (unsigned char *)in, (unsigned char *)out,NULL) @@ -507,9 +519,6 @@ struct plugg plugg[] = { #define PAD8(__x) (((__x)+7)/8) unsigned char *u32enc(unsigned *__restrict in, int n, unsigned *__restrict out) { unsigned *ip; - #if 0 - memcpy(out,in,n*4); return (unsigned char *)(out+n); - #else for(ip = in; ip != in+(n&~3); ) { *out++ = *ip++; *out++ = *ip++; @@ -518,13 +527,9 @@ unsigned char *u32enc(unsigned *__restrict in, int n, unsigned *__restrict out) } while(ip < in+n) *out++ = *ip++; return (unsigned char *)out; - #endif } unsigned char *u32dec(unsigned *__restrict in, int n, unsigned *__restrict out) { unsigned *op; - #if 0 - memcpy(out,in,n*4); return (unsigned char *)(in+n); - #else for(op = out; op != out+(n&~3); ) { *op++ = *in++; *op++ = *in++; @@ -533,26 +538,16 @@ unsigned char *u32dec(unsigned *__restrict in, int n, unsigned *__restrict out) } while(op < out+n) *op++ = *in++; return (unsigned char *)in; - #endif } unsigned char *_bitunpackx32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out , unsigned b) { unsigned i,k=0; for(i=0; i < n; i++,k+=b ) *out++ = _bitgetx32(in, k, b); return in + PAD8(n*b); } -// direct access functions included for demonstration only. Use the bulk functions instead, if you are decompressing most of the values +// direct access functions : included for demonstration only. Use the bulk functions instead, if you are decompressing most of the values unsigned char *bitf1unpackx32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, int start, unsigned b) { int i; for(i = 0; i < n; i++) out[i] = bitgetx32(in, i, b)+start+i+1; return in + PAD8(n*b); } -unsigned char *bitfunpackx32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, int start, unsigned b) { int i; for(i = 0; i < n; i++) out[i] = bitgetx32(in, i, b)+start; return in + PAD8(n*b); } -//---------------------------------------------- plugins -------------------------------------------------------- -#include "conf.h" - #ifndef max -#define max(x,y) (((x)>(y)) ? (x) : (y)) - #endif +unsigned char *bitfunpackx32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, int start, unsigned b) { int i; for(i = 0; i < n; i++) out[i] = bitgetx32(in, i, b)+start; return in + PAD8(n*b); } -void libmemcpy(unsigned char *dst, unsigned char *src, int len) { - void *(*memcpy_ptr)(void *, const void *, size_t) = memcpy; - if (time(NULL) == 1) - memcpy_ptr = NULL; - memcpy_ptr(dst, src, len); -} +//---------------------------------------------- plugins -------------------------------------------------------- +#include "conf.h" int codini(size_t insize, int codec) { switch(codec) { @@ -572,68 +567,74 @@ int codini(size_t insize, int codec) { void codexit(int codec) {} -//-------------- Sorted integer array : Delta/Differential compression (mode=0 increasing, mode=1 strictly increasing sequence) --------------- -#define VBPUT32(a,b) { unsigned _xx= b; vbxput32(a,_xx);} -#define VBGET32(a,b) vbxget32(a,b) - +//--- Sorted integer array : Delta/Differential compression (inc=0 increasing, inc=1 strictly increasing sequence) --------------- unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int outsize, int codec, int lev, char *prm, int inc) { - unsigned *in = (unsigned *)_in, n = (_n+3) / 4,i; - - unsigned pa[BLK_SIZE+2048],x,b; + unsigned *in = (unsigned *)_in, n = (_n+3) / 4, i, pa[BLK_SIZE+2048], x, b; - switch(codec&0x3f) { - //----------- copy --------------------------------------------------------------------------------------------------------- - case P_COPY: return u32enc( in, n, (unsigned *)out); - case P_MCPY: memcpy(out, _in, _n); return out+_n; - - case TB_VSIMPLE: x = *in++; bitdelta32( in, --n, pa, x, inc);VBPUT32(out, x); return vsenc32( pa, n, out); - case TB_VBYTE: x = *in++; --n; VBPUT32(out, x); return inc?vbd1enc32( in, n, out, x ):vbdenc32( in, n, out, x); - case TB_EFANO: x = *in++; --n; VBPUT32(out, x); return inc?efano1enc32(in, n, out, x+1):efanoenc32(in, n, out, x); + switch(codec) { + case TB_VSIMPLE: x = *in++; bitdelta32( in, --n, pa, x, inc); + vbxput32(out, x); return vsenc32( pa, n, out); + case TB_VBYTE: x = *in++; --n; vbxput32(out, x); return inc?vbd1enc32( in, n, out, x ):vbdenc32( in, n, out, x); + case TB_EF: x = *in++; --n; vbxput32(out, x); return inc?efano1enc32( in, n, out, x+1 ):efanoenc32( in, n, out, x); + + case TB_PFDA: x = *in++; --n; vbxput32(out, x); DELTR(in,n,x,inc,pa); + return p4encx32(pa,n,out); + case TB_FOR : + case TB_FORDA: x = *in++; --n; vbxput32(out, x); + if(inc) { b = bitf132(in, n, x); *out++=b; return bitf1pack32(in, n, out, x, b); } + else { b = bitf32( in, n, x); *out++=b; return bitfpack32( in, n, out, x, b); } + + case TB_BP128H: + case TB_BP: x = *in++; --n; vbxput32(out, x); + if(inc) { b = bitd132(in, n, x); *out++=b; return bitd1pack32(in, n, out, x, b); } + else { b = bitd32( in, n, x); *out++=b; return bitdpack32( in, n, out, x, b); } + case TB_BPN: return out+(inc?bitnd1pack32(in, n, out):bitndpack32( in, n, out)); + #if C_TURBOPFORV - case TB_EFANOV: x = *in++; --n; VBPUT32(out, x); - if(inc) return n == 128?efano1enc128v32(in, n, out, x+1):efano1enc32(in, n, out, x+1); - else return n == 128?efanoenc128v32( in, n, out, x ):efanoenc32( in, n, out, x ); - case TB_PFOR128: x = *in++; --n; VBPUT32(out, x); - if(inc) return n == 128?p4d1enc128v32(in, n, out, x):p4d1enc32( in, n, out, x); - else return n == 128?p4denc128v32( in, n, out, x):p4denc32( in, n, out, x); - case TB_PFORN128: return out+(inc?p4nd1enc128v32( in, n, out):p4ndenc128v32(in, n, out)); - case TB_PACK128V: x = *in++; --n; - if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitd1pack128v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); } - else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitdpack128v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); } + case TB_EF128V: x = *in++; --n; vbxput32(out, x); + if(inc) return n == 128?efano1enc128v32(in, n, out, x+1 ):efano1enc32( in, n, out, x+1); + else return n == 128?efanoenc128v32( in, n, out, x ):efanoenc32( in, n, out, x ); + case TB_PF128V: x = *in++; --n; vbxput32(out, x); + if(inc) return n == 128?p4d1enc128v32( in, n, out, x ):p4d1enc32( in, n, out, x); + else return n == 128?p4denc128v32( in, n, out, x ):p4denc32( in, n, out, x); + case TB_PFN128V: return out+(inc?p4nd1enc128v32( in, n, out ):p4ndenc128v32(in, n, out)); + + case TB_FOR128V: x = *in++; --n; vbxput32(out, x); + if(inc) { b = bitf132(in, n, x); *out++=b; return n == 128?bitf1pack128v32(in, n, out, x, b):bitf1pack32( in, n, out, x, b); } + else { b = bitf32( in, n, x); *out++=b; return n == 128?bitfpack128v32( in, n, out, x, b):bitfpack32( in, n, out, x, b); } + case TB_BP128V: x = *in++; --n; vbxput32(out, x); + if(inc) { b = bitd132(in, n, x); *out++=b; return n == 128?bitd1pack128v32(in, n, out, x, b):bitd1pack32( in, n, out, x, b); } + else { b = bitd32( in, n, x); *out++=b; return n == 128?bitdpack128v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); } #ifdef __AVX2__ - case TB_EFANO256V: x = *in++; --n; VBPUT32(out, x); - if(inc) return n == 256?efano1enc256v32(in, n, out, x+1):efano1enc32(in, n, out, x+1); - else return n == 256?efanoenc256v32( in, n, out, x ):efanoenc32( in, n, out, x ); - case TB_PFOR256: x = *in++; bitdelta32( in, --n, pa, x, inc);VBPUT32(out, x); return n == 256?p4enc256v32(pa, n, out ):p4enc32(pa, n, out); - case TB_PFORN256: return out+(inc?p4nd1enc256v32( in, n, out):p4ndenc256v32(in, n, out)); - /*case TB_PACK256V: x = *in++; --n; - if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitd1pack256v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); } - else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitdpack256v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }*/ - case TB_PACK256V: x = *in++; bitdelta32( in, --n, pa, x, inc);VBPUT32(out, x); *out++=b; return n == 256?bitpack256v32(pa, n, out,b):bitpack32(pa, n, out,b); + case TB_FOR256V: x = *in++; --n; vbxput32(out, x); + if(inc) { b = bitf132(in, n, x); *out++=b; return n == 256?bitf1pack256v32(in, n, out, x, b):bitf1pack32( in, n, out, x, b); } + else { b = bitf32( in, n, x); *out++=b; return n == 256?bitfpack256v32( in, n, out, x, b):bitfpack32( in, n, out, x, b); } + case TB_EF256V: x = *in++; --n; vbxput32(out, x); + if(inc) return n == 256?efano1enc256v32(in, n, out, x+1 ):efano1enc32( in, n, out, x+1); + else return n == 256?efanoenc256v32( in, n, out, x ):efanoenc32( in, n, out, x ); + case TB_PF256V: x = *in++; bitdelta32( in, --n, pa, x, inc); + vbxput32(out, x); return n == 256?p4enc256v32( pa, n, out ):p4enc32( pa, n, out); + case TB_PFN256V: return out+(inc?p4nd1enc256v32( in, n, out ):p4ndenc256v32(in, n, out)); + case TB_BP256V: x = *in++;bitdelta32( in, --n, pa, x, inc); + vbxput32(out, x);*out++=b; return n == 256?bitpack256v32( pa, n, out, b ):bitpack32( pa, n, out,b); + /*case TB_BP256V: x = *in++; --n; + if(inc) { b = bitd132(in, n, x); vbxput32(out, x); *out++=b; return n == 256?bitd1pack256v32(in, n, out, x, b):bitd1pack32( in, n, out, x, b); } + else { b = bitd32( in, n, x); vbxput32(out, x); *out++=b; return n == 256?bitdpack256v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }*/ #endif #endif - case TB_PFORDA: DELTR( in, n, inc, pa); VBPUT32(out, in[0]); return p4encx32( pa+1, n-1, out); - case TB_FOR : - case TB_FORDA: x = *in++; --n; - if(inc) { b = bitf132(in, n, x); VBPUT32(out, x); *out++=b; return bitf1pack32(in, n, out, x, b); } - else { b = bitf32( in, n, x); VBPUT32(out, x); *out++=b; return bitfpack32( in, n, out, x, b); } - - case TB_PACK128H: - case TB_PACK: x = *in++; --n; - if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return bitd1pack32(in, n, out, x, b); } - else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return bitdpack32( in, n, out, x, b); } - case TB_NPACK: return out+(inc?bitnd1pack32(in, n, out):bitndpack32( in, n, out)); + //-------- External libs ---------- #if C_SIMPLE8B - case AM_SIMPLE8B: b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("simple-8b overflow.bits size>28\n"); - return vs8benc( pa, n, out); - #endif - + case AM_SIMPLE8B: b = bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); if(b>28) die("simple-8b overflow.bits size>28\n"); + return vs8benc( pa, n, out); + #endif + case P_COPY: return u32enc( in, n, (unsigned *)out); + case P_MCPY: memcpy(out, _in, _n); return out+_n; #if C_FASTPFOR - case FP_VBYTE: bitdelta32( in, n, pa, -inc, inc); return vbyteenc( pa, n, (unsigned *)out); + case FP_VBYTE: bitdelta32( in, n, pa, -inc, inc); return vbyteenc( pa, n, (unsigned *)out); #endif #if C_MASKEDVBYTE - case P_MASKEDVBYTE: return out+vbyte_encode_delta(in, n, out, 0); + case P_MASKEDVBYTE: return out+vbyte_encode_delta(in, n, out, 0); #endif #if C_LIBFOR @@ -642,34 +643,30 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int #endif #if C_POLYCOM - case PC_VBYTE: bitdelta32( in, n, pa, -inc, inc); return vbpolyenc(pa, n, out); - case PC_SIMPLE16: b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("simple16 overflow.bits size>28\n"); - return vs16enc( pa, n, (unsigned *)out); - case PC_OPTPFD: b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("optp4 overflow.bits size>28\n"); - return optpfdenc32(pa, n, out); - /*b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("optp4 overflow.bits size>28\n"); + case PC_VBYTE: bitdelta32( in, n, pa, -inc, inc);return vbpolyenc(pa, n, out); + case PC_SIMPLE16: b = bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); if(b>28) die("simple16 overflow.bits size>28\n"); + return vs16enc( pa, n, (unsigned *)out); + case PC_OPTPFD: b = bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); if(b>28) die("optp4 overflow.bits size>28\n"); + return optpfdenc32(pa, n, out); + /*b = bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); if(b>28) die("optp4 overflow.bits size>28\n"); if(n < 128) return vbyteenc( pa, n, (unsigned *)out); else { return out + OPT4(pa, n, (unsigned *)out); }*/ #endif #if C_QMX - //case P_QMX: { bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); unsigned char *q = qmx_enc(pa, n, out+4); *(unsigned *)out = q - (out+4); return q; } - #endif - - #if C_QMX - case P_QMX: { bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); ANT_compress_qmx qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } - case P_QMX2: { bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } - case P_QMX3: { bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } - case P_QMX4: { bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } - #endif - + //case P_QMX: { bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); unsigned char *q = qmx_enc(pa, n, out+4); *(unsigned *)out = q - (out+4); return q; } + case P_QMX: { bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); ANT_compress_qmx qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX2: { bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX3: { bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX4: { bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; } + #endif #if C_SIMDCOMP case SC_FOR: - case SC_FORDA: b = bitf32( in+1, --n, in[0]); VBPUT32(out, in[0]); *out++=b; if(n<128) return (unsigned char *)simdpackFOR_length(in[0], (in+1), n, (__m128i *)out, b); + case SC_FORDA: x = *in++; b = bitf32( in, --n, x); vbxput32(out, x); *out++=b; return (unsigned char *)simdpackFOR_length(x, in, n, (__m128i *)out, b); case SC_SIMDPACK128: - if(n < 129) { bitdelta32( in, n, pa, -inc, inc); return vbyteenc((unsigned *)pa, n, (unsigned *)out); } - else { b = simdmaxbitsd1(in[0], in+1); VBPUT32(out, in[0]); *out++=b; return simdpackwithoutmaskd1n(in+1, n-1, (unsigned *)out, in[0], b); } + if(n < 129) { bitdelta32( in, n, pa, -inc, inc); return vbyteenc((unsigned *)pa, n, (unsigned *)out); } + else { b = simdmaxbitsd1(in[0], in+1); vbxput32(out, in[0]); *out++=b; return simdpackwithoutmaskd1n(in+1, n-1, (unsigned *)out, in[0], b); } #endif else { simdpackFOR(in[0], (in+1), (__m128i *)out, b); return out + simdpackFOR_compressedbytes(n, b); } #if C_STREAMVBYTE @@ -681,10 +678,10 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int #endif // --------- transpose + lz77 ------------------------------------------------------------------------------------------------ #if C_TRANSFORM - case TB_ZIGZAG_32: b = bitzigzag32(in, n, (unsigned *)out, 0); return out + n*4; - case TB_TP8_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); _transpose4( (unsigned char *)sbuf, n*4, out); return out + n*4; - case TB_TP8V_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); TRANSPOSE( (unsigned char *)sbuf, n*4, out); return out + n*4; - case TB_TP4V_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); transposen4( (unsigned char *)sbuf, n*4, out); return out + n*4; + case TB_ZIGZAG_32:b = bitzigzag32(in, n, (unsigned *)out, 0); return out + n*4; + case TB_TP8_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); _transpose4( (unsigned char *)sbuf, n*4, out); return out + n*4; + case TB_TP8V_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); TRANSPOSE( (unsigned char *)sbuf, n*4, out); return out + n*4; + case TB_TP4V_32: bitdelta32(in, n, (unsigned *)sbuf, -inc, inc); transposen4( (unsigned char *)sbuf, n*4, out); return out + n*4; case TB_DELTA_32: bitdelta32(in, n, (unsigned *)out, -inc, inc); return out + n*4; #endif @@ -694,9 +691,9 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int case P_LZT20:{ bitdelta32(in, n, (unsigned *)out, -inc, inc); TRANSPOSE((unsigned char *)out, n*4, sbuf); struct lzobj lz; lz.srclen = n*4; lz.src = sbuf; lz.dst = out; lz.dstlen = n*4; lz.level = 0; lz.hbits = 16; return out + lzbc01(&lz); } case P_LZT22:{ bitdelta32(in, n, (unsigned *)out, -inc, inc); TRANSPOSE((unsigned char *)out, n*4, sbuf); struct lzobj lz; lz.srclen = n*4; lz.src = sbuf; lz.dst = out; lz.dstlen = n*4; lz.level = 2; lz.hbits = 26; return out + lzbc2(&lz); } case P_VSHUF:{ bitdelta32(in, n, (unsigned *)out, -inc, inc); TRANSPOSE((unsigned char *)out, n*4, sbuf); unsigned char *p = mheenc(sbuf, n*4, MH_BLK, out+4, out+n*4+MH_BLK); ctou32(out) = p-(out+4); return p; } - //case P_VSHUF:{ bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); unsigned char *p=vsenc32(pa, n, sbuf); p = mheenc(sbuf, p-sbuf, MH_BLK, out+4, out+n*4+1024); ctou32(out) = p-(out+4); return p; } + //case P_VSHUF:{ bitdelta32( in+1, --n, pa, in[0], inc); vbxput32(out, in[0]); unsigned char *p=vsenc32(pa, n, sbuf); p = mheenc(sbuf, p-sbuf, MH_BLK, out+4, out+n*4+1024); ctou32(out) = p-(out+4); return p; } #endif - + #if C_C_BLOSC case P_BS_LZ: case P_BS_LZ4: @@ -717,100 +714,111 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int case P_ZLIB1: case P_ZLIB2: case P_ZLIB3: case P_ZLIB4: case P_ZLIB5: case P_ZLIB6: case P_ZLIB7: case P_ZLIB8: case P_ZLIB9: { bitdelta32(in, n, (unsigned *)out, -inc, inc); TRANSPOSE((unsigned char *)out, n*4, sbuf); uLongf outlen = n*4; int rc = compress2(out+4, &outlen, sbuf, n*4, codec-P_ZLIB1+1); if(rc != Z_OK) die("zlib compress2 rc=%d\n", rc); *(unsigned *)out = outlen; return out + 4 + outlen; } #endif - case P_MAX ... 63: break; } return out; } unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, int outlen, int codec, int lev, char *prm, int inc) { unsigned *out = (unsigned *)_out, n = (outlen+3) / 4,x,b; - switch(codec&0x3f) { - //------------- copy ------------------------------------------------------- - case P_COPY: return u32dec( (unsigned *)in, n, out); - case P_MCPY: memcpy(_out, in, _n); return _out+_n; + + switch(codec) { + case TB_VSIMPLE:vbxget32(in, x);*out++ = x; --n; in = vsdec32( in, n, out); bitundn32(out, n, x+inc, inc); return in; + case TB_EF: vbxget32(in, x);*out++ = x; --n; return inc?efano1dec32( in, n, out, x+1 ):efanodec32( in, n, out, x); + case TB_VBYTE: vbxget32(in, x);*out++ = x; --n; return inc?vbd1dec32( in, n, out, x ):vbddec32( in, n, out, x); - case TB_PFORDA: VBGET32(in, x);*out = x; return inc?p4fdecx32( in, n-1, out+1, x ):p4f0decx32(in, n-1, out+1, x); - case TB_VBYTE: VBGET32(in, x);*out = x; return inc?vbd1dec32( in, n-1, out+1, x ):vbddec32(in, n-1, out+1, x); - case TB_VSIMPLE: VBGET32(in, x);*out = x; in = vsdec32( in, n-1, out+1); bitundn32(out, n, -inc, inc); break; - case TB_FOR: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpack32( in, n-1, out+1, x, b):bitfunpack32( in, n-1, out+1, x, b); - case TB_FORDA: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpackx32( in, n-1, out+1, x, b):bitfunpackx32( in, n-1, out+1, x, b); - case TB_PACK: VBGET32(in, x);*out = x; b = *in++; return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b); - case TB_NPACK: return in+(inc?bitnd1unpack32( in, n, out ):bitndunpack32( in, n, out)); - case TB_EFANO: VBGET32(in, x);*out++ = x; --n; return inc?efano1dec32( in, n, out, x+1):efanodec32( in, n, out, x); + case TB_PFDA: vbxget32(in, x);*out++ = x; --n; return inc?p4f1decx32( in, n, out, x ):p4fdecx32( in, n, out, x); + + case TB_FOR: vbxget32(in, x);*out++ = x; --n; b = *in++; return inc?bitf1unpack32( in, n, out, x, b):bitfunpack32( in, n, out, x, b); + case TB_FORDA: vbxget32(in, x);*out++ = x; --n; b = *in++; return inc?bitf1unpackx32( in, n, out, x, b):bitfunpackx32( in, n, out, x, b); + case TB_BP: vbxget32(in, x);*out++ = x; --n; b = *in++; return inc?bitd1unpack32( in, n, out, x, b):bitdunpack32( in, n, out, x, b); + case TB_BPN: return in+(inc?bitnd1unpack32(in, n, out ):bitndunpack32( in, n, out)); + #if C_TURBOPFORV - case TB_EFANOV: VBGET32(in, x); *out++ = x; --n; - if(inc) { return n==128?efano1dec128v32(in, n, out, x+1 ):efano1dec32(in, n, out, x+1); } - else { return n==128?efanodec128v32( in, n, out, x ):efanodec32( in, n, out, x); } - case TB_PFOR128: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256); - if(inc) { return n==128?p4d1dec128v32( in, n, out, x ):p4d1dec32(in, n, out, x); } - else { return n==128?p4ddec128v32( in, n, out, x ):p4ddec32( in, n, out, x); } - case TB_PFORN128: return in+(inc?p4nd1dec128v32(in, n, out):p4nddec128v32( in, n, out)); - case TB_PACK128V: VBGET32(in, x);*out = x; b = *in++; - if(n <= 128) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in,n-1, out+1, x, b); } - else { return inc?bitd1unpack128v32( in, n, out+1, x, b):bitdunpack128v32(in,n, out+1, x, b); } + case TB_FOR128V:vbxget32(in, x);*out++ = x; --n; b = *in++; + if(inc) { return n==128?bitf1unpack128v32( in, n, out, x, b):bitf1unpack32( in, n, out, x, b); } + else { return n==128?bitfunpack128v32( in, n, out, x, b):bitfunpack32(in, n, out, x, b); } + case TB_EF128V: vbxget32(in, x);*out++ = x; --n; + if(inc) { return n==128?efano1dec128v32(in, n, out, x+1 ):efano1dec32( in, n, out, x+1); } + else { return n==128?efanodec128v32( in, n, out, x ):efanodec32( in, n, out, x); } + case TB_PF128V: vbxget32(in, x);*out++ = x; --n; //__builtin_prefetch(in+256); + if(inc) { return n==128?p4d1dec128v32( in, n, out, x ):p4d1dec32( in, n, out, x); } + else { return n==128?p4ddec128v32( in, n, out, x ):p4ddec32( in, n, out, x); } + case TB_PFN128V: return in+(inc?p4nd1dec128v32(in, n, out ):p4nddec128v32( in, n, out)); + case TB_BP128V: vbxget32(in, x);*out++ = x; --n; b = *in++; + if(inc) { return n==128?bitd1unpack128v32(in, n, out, x, b):bitd1unpack32( in, n, out, x, b); } + else { return n==128?bitdunpack128v32( in, n, out, x, b):bitdunpack32( in, n, out, x, b); } #ifdef __AVX2__ - case TB_EFANO256V:VBGET32(in, x); *out++ = x; --n; - if(inc) { return n==256?efano1dec256v32(in, n, out, x+1 ):efano1dec32(in, n, out, x+1); } - else { return n==256?efanodec256v32( in, n, out, x ):efanodec32( in, n, out, x); } - case TB_PFOR256: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256); - if(inc) { return n==256?p4d1dec256v32( in, n, out, x ):p4d1dec32(in, n, out, x); } - else { return n==256?p4ddec256v32( in, n, out, x ):p4ddec32( in, n, out, x); } - case TB_PFORN256: return in+(inc?p4nd1dec256v32(in, n, out ):p4nddec256v32( in, n, out)); - /*case TB_PACK256V: VBGET32(in, x);*out = x; b = *in++; - if(n <= 256) return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b); - else { in = bitunpack256v32( in, out+1, b);bitundn32(out, n, -inc, inc); } break;*/ - case TB_PACK256V: VBGET32(in, x);*out = x; b = *in++; - if(n <= 256) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b); } - else { return inc?bitd1unpack256v32(in, n-1, out+1, x, b):bitdunpack256v32( in, n-1, out+1, x, b); } + case TB_EF256V: vbxget32(in, x);*out++ = x; --n; + if(inc) { return n==256?efano1dec256v32(in, n, out, x+1 ):efano1dec32( in, n, out, x+1); } + else { return n==256?efanodec256v32( in, n, out, x ):efanodec32( in, n, out, x); } + case TB_PF256V: vbxget32(in, x);*out++ = x; --n; //__builtin_prefetch(in+256); + if(inc) { return n==256?p4d1dec256v32( in, n, out, x ):p4d1dec32( in, n, out, x); } + else { return n==256?p4ddec256v32( in, n, out, x ):p4ddec32( in, n, out, x); } + case TB_PFN256V: return in+(inc?p4nd1dec256v32(in, n, out ):p4nddec256v32( in, n, out)); + + + case TB_FOR256V:vbxget32(in, x);*out++ = x; --n; b = *in++; + if(inc) { return n==256?bitf1unpack256v32( in, n, out, x, b):bitf1unpack32( in, n, out, x, b); } + else { return n==256?bitfunpack256v32( in, n, out, x, b):bitfunpack32(in, n, out, x, b); } + case TB_BP256V: vbxget32(in, x);*out++ = x; --n; b = *in++; + if(inc) { return n==256?bitd1unpack256v32(in, n, out, x, b):bitd1unpack32( in, n, out, x, b); } + else { return n==256?bitdunpack256v32( in, n, out, x, b):bitdunpack32( in, n, out, x, b); } + /*case TB_BP256V:vbxget32(in, x);*out = x; --n; b = *in++; + if(n <= 256) return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b); + else { in = bitunpack256v32( in, out+1, b);bitundn32(out, n, -inc, inc); } break;*/ #endif #endif #if C_TURBOPFORH - case TB_PACK128H: VBGET32(in, x);*out = x; b = *in++; - if(n != 128) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b); } - else { return inc?bitd1unpack128h32(in, n-1, out+1, x, b):bitdunpack128h32( in, n-1, out+1, x, b); } + case TB_BP128H: vbxget32(in, x);*out++ = x; --n; b = *in++; + if(inc) { return n==128?bitd1unpack128h32(in, n, out, x, b):bitd1unpack32( in, n, out, x, b); } + else { return n==128?bitdunpack128h32( in, n, out, x, b):bitdunpack32( in, n, out, x, b); } #endif + //-------- Externa libs + case P_COPY: return u32dec( (unsigned *)in, n, out); + case P_MCPY: memcpy(_out, in, _n); return _out+_n; + #if C_SIMPLE8B - case AM_SIMPLE8B: VBGET32(in, x); *out = x; in = vs8bdec( in, n-1, out+1); bitundn32(out, n, -inc, inc); break; + case AM_SIMPLE8B: vbxget32(in, x);*out++ = x; --n; in = vs8bdec(in, n, out); bitundn32(out, n, x+inc, inc); break; #endif #if C_FASTPFOR - case FP_VBYTE: in = vbytedec( in, n, out); bitundn32(out, n, -inc, inc); break; + case FP_VBYTE: in = vbytedec( in, n, out); bitundn32(out, n, -inc, inc); break; #endif #if C_LIBFOR - case LF_FORX: return for_selectx(in, n, out); //{ out[0] = *(unsigned *)in, b = in[4]; return for_selectx(in+5, n-1, out+1, out[0], b); } - case LF_FOR: return in + for_uncompress(in, out, n); + case LF_FORX: return for_selectx(in, n, out); //{ out[0] = *(unsigned *)in, b = in[4]; return for_selectx(in+5, n-1, out+1, out[0], b); } + case LF_FOR: return in + for_uncompress(in, out, n); #endif #if C_MASKEDVBYTE - case P_MASKEDVBYTE: in += masked_vbyte_decode_delta(in, out, n, 0); break; + case P_MASKEDVBYTE: in += masked_vbyte_decode_delta(in, out, n, 0); break; #endif #if C_POLYCOM case PC_VBYTE: in = vbpolydec( in, n, out); bitundn32(out, n, -inc, inc); break; - case PC_SIMPLE16: VBGET32(in, x); *out = x; in = vs16dec((unsigned *)in, n-1, out+1); bitundn32(out, n, -inc, inc); break; - case PC_OPTPFD: VBGET32(in, x); *out = x; in = optpfddec32(in,n-1,out+1); bitundn32(out+1, n-1, x, inc); break; + case PC_SIMPLE16: vbxget32(in, x); *out = x; in = vs16dec((unsigned *)in, n-1, out+1); bitundn32(out, n, -inc, inc); break; + case PC_OPTPFD: vbxget32(in, x); *out = x; in = optpfddec32(in,n-1,out+1); bitundn32(out+1, n-1, x, inc); break; /*if(n < 129) in = vbytedec(in, n, out); else { _VBGET32(in, x, *out = x); unsigned all_array[2048]; in = (unsigned char *)detailed_p4_decode(out+1, (unsigned *)in, all_array); } bitundn32(out, n, -inc, inc); break;*/ #endif - //case P_QMX: { VBGET32(in, x); *out = x; unsigned l = *(unsigned *)in; in = qmx_dec(in+4, l, out+1, n-1); bitundn32(out+1, n-1, x, inc); break; } + //case P_QMX: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; in = qmx_dec(in+4, l, out+1, n-1); bitundn32(out+1, n-1, x, inc); break; } #if C_QMX //case P_QMX: return qmx_dec(in+4, ctou32(in), out, n); - case P_QMX: { VBGET32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} - case P_QMX2: { VBGET32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v2 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} - case P_QMX3: { VBGET32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v3 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} - case P_QMX4: { VBGET32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v4 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} + case P_QMX: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} + case P_QMX2: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v2 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} + case P_QMX3: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v3 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} + case P_QMX4: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v4 qmx; qmx.decompress(out+1, n-1, in+4, ctou32(in)); bitundn32(out+1, n-1, x, inc); return in+4+ctou32(in);} #endif #if C_SIMDCOMP case SC_SIMDPACK128: if(n < 129) { in = vbytedec(in, n, out); bitundn32(out, n, -inc, inc); } - else { VBGET32(in, x);*out = x; b = *in++; in = simdunpackd1n((uint32_t *)in, n-1, out+1, out[0], b); } break; + else { vbxget32(in, x);*out = x; b = *in++; in = simdunpackd1n((uint32_t *)in, n-1, out+1, out[0], b); } break; - case SC_FOR: VBGET32(in, x);*out = x; b = *in++; if(n < 129) return (unsigned char *)simdunpackFOR_length(x, (const __m128i *)in, n-1, (unsigned *)(out+1), b); else { simdunpackFOR(x, (const __m128i *)in, (unsigned *)(out+1), b); return in + simdpackFOR_compressedbytes(n-1, b); } - case SC_FORDA: { VBGET32(in, x);*out++ = x; b = *in++; unsigned i; for(i=0; i < n-1;i++) out[i] = simdselectFOR(x, (const __m128i *)in, b, i); return in + simdpackFOR_compressedbytes(n-1, b); } + case SC_FOR: vbxget32(in, x);*out = x; b = *in++; if(n < 129) return (unsigned char *)simdunpackFOR_length(x, (const __m128i *)in, n-1, (unsigned *)(out+1), b); else { simdunpackFOR(x, (const __m128i *)in, (unsigned *)(out+1), b); return in + simdpackFOR_compressedbytes(n-1, b); } + case SC_FORDA: { vbxget32(in, x);*out++ = x; --n; b = *in++; unsigned i; for(i=0; i < n;i++) out[i] = simdselectFOR(x, (const __m128i *)in, b, i); return in + simdpackFOR_compressedbytes(n, b); } #endif #if C_STREAMVBYTE @@ -823,9 +831,9 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i //---------- transpose + lz77 ---------------------- #if C_TRANSFORM case TB_ZIGZAG_32:memcpy(out, in, n*4); bitunzigzag32(out, n, 0); return in + n*4; - case TB_TP8_32: _untranspose4( (unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; - case TB_TP8V_32: UNTRANSPOSE((unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; - case TB_TP4V_32: untransposen4((unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; + case TB_TP8_32: _untranspose4((unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; + case TB_TP8V_32: UNTRANSPOSE( (unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; + case TB_TP4V_32: untransposen4((unsigned char *)in, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); return in + n*4; case TB_DELTA_32: memcpy(out, in, n*4); bitundn32(out, n, -inc, inc); return in + n*4; #endif //---------- delta + transpose + lz77 ---------------------- @@ -853,48 +861,98 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i case P_LZT10: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lz8d(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); } break; case P_LZT20: case P_LZT22: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lzbd(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); } break; - //case P_VSHUF: VBGET32(in, x);*out = x; in += ransdecompress( in, (n-1)*4, sbuf); vsdec32( sbuf, n-1, out+1); bitundn32(out, n, -inc, inc); break; + //case P_VSHUF: vbxget32(in, x);*out = x; in += ansdecompress( in, (n-1)*4, sbuf); vsdec32( sbuf, n-1, out+1); bitundn32(out, n, -inc, inc); break; case P_VSHUF: { unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); bitundn32(out, n, -inc, inc); } break; - //case P_VSHUF: { VBGET32(in, x);*out = x; unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); vsdec32(sbuf, n-1, out+1); bitundn32(out+1, n-1, x, inc); } break; + //case P_VSHUF: { vbxget32(in, x);*out = x; unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); vsdec32(sbuf, n-1, out+1); bitundn32(out+1, n-1, x, inc); } break; #endif - case P_MAX ... 63: break; + } + return in; +} + +unsigned char *codcompz(unsigned char *_in, unsigned _n, unsigned char *out, int outsize, int codec, int lev, char *prm, int inc) { + unsigned *in = (unsigned *)_in, n = (_n+3) / 4, i, pa[BLK_SIZE+2048], x, b; + + switch(codec) { + case TB_VBYTE: x = *in++; --n; vbxput32(out, x); return vbzenc32( in, n, out, x); + case TB_BP: x = *in++; --n; vbxput32(out, x);b = bitz32(in, n, x); *out++=b;return bitzpack32(in, n, out, x, b); + case TB_BPN: x = *in++; --n; vbxput32(out, x); return out+bitnzpack32(in, n, out); + #if C_TURBOPFORV + case TB_BP128V: x = *in++; --n; vbxput32(out, x);b = bitz32(in, n, x); *out++=b;return n == 128?bitzpack128v32(in, n, out, x, b):bitzpack32(in, n, out, x, b); + #ifdef __AVX2__ + //case TB_BP256V: b = bitz32(in, n, x); *out++=b; return n == 256?bitzpack256v32(in, n, out, x, b ):bitzpack32(in, n, out,x, b); + #endif + #endif + } + return out; +} + +unsigned char *coddecompz(unsigned char *in, unsigned _n, unsigned char *_out, int outlen, int codec, int lev, char *prm, int inc) { + unsigned *out = (unsigned *)_out, n = (outlen+3) / 4,x,b; + + + switch(codec) { + case TB_VBYTE: vbxget32(in, x); *out++ = x; --n; return vbzdec32(in, n, out, x); + case TB_BP: vbxget32(in, x); *out++ = x; --n; b = *in++; return bitzunpack32(in, n, out, x, b); + case TB_BPN: vbxget32(in, x); *out++ = x; --n; return in+bitnzunpack32(in, n, out); + + #if C_TURBOPFORV + case TB_BP128V:vbxget32(in, x); *out++ = x; --n; b = *in++; return n == 128?bitzunpack128v32(in, n, out, x, b):bitzunpack32(in, n, out, x, b); + #ifdef __AVX2__ + //case TB_BP256V: b = *in++; return n == 256?bitzunpack256v32(in, n, out, x, b):bitzunpack32(in, n, out, x, b); + #endif + #endif } return in; } unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int outsize, int codec, int lev, char *prm, int b) { - unsigned *in = (unsigned *)_in, n = (_n+3) / 4; - int i,xb; - switch(codec&0x3f) { - case P_COPY: return u32enc( in, n, (unsigned *)out); - case P_MCPY: return u32enc( in, n, (unsigned *)out); //memcpy(out, _in, _n); return out+_n; + unsigned *in = (unsigned *)_in, n = (_n+3) / 4, x; + + switch(codec) { + case TB_EF: return out; + case TB_VBYTE: return vbenc32( in, n, out); + case TB_VSIMPLE: return vsenc32( in, n, out); + + case TB_PFDA: return p4encx32( in, n, out); - case TB_VBYTE: return vbenc32( in, n, out); - case TB_VSIMPLE: return vsenc32( in, n, out); - case TB_EFANO: return out; - case TB_FOR : - case TB_FORDA: - case TB_PACK128H: - case TB_PACK: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return bitpack32(in, n, out, b); - case TB_NPACK: return out+bitnpack32(in, n, out); - case TB_PFORDA: return p4encx32( in, n, out); + case TB_FOR : + case TB_FORDA: b = bitfm32( in, n, &x); vbxput32(out, x); *out++=b; return bitfpack32( in, n, out, x, b); + + case TB_BP128H: + case TB_BP: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } + return bitpack32(in, n, out, b); + case TB_BPN: return out+bitnpack32(in, n, out); #if C_TURBOPFORV - case TB_EFANOV:return out; - case TB_PFOR128: return n == 128?p4enc128v32(in, n, out):p4enc32(in, n, out); - case TB_PFORN128: return out+p4nenc128v32(in, n, out); - case TB_PACK128V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 128?bitpack32(in, n, out, b):bitpack128v32(in, n, out, b); + case TB_EF128V: return out; + + case TB_PF128V: return n == 128?p4enc128v32(in, n, out):p4enc32(in, n, out); + case TB_PFN128V: return out+p4nenc128v32(in, n, out); + + case TB_FOR128V: b = bitfm32( in, n, &x); vbxput32(out, x); *out++=b; return bitfpack128v32( in, n, out, x, b); + + case TB_BP128V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } + return n == 128?bitpack128v32(in, n, out, b):bitpack32(in, n, out, b); #ifdef __AVX2__ - case TB_PFOR256: return n == 256?p4enc256v32(in, n, out):p4enc32(in, n, out); - case TB_PFORN256: return out+p4nenc256v32(in, n, out); - case TB_PACK256V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 256?bitpack32(in, n, out, b):bitpack256v32(in, n, out, b); + case TB_PF256V: return n == 256?p4enc256v32(in, n, out):p4enc32(in, n, out); + case TB_PFN256V: return out+p4nenc256v32(in, n, out); + + case TB_FOR256V: b = bitfm32( in, n, &x); vbxput32(out, x); *out++=b; return bitfpack256v32( in, n, out, x, b); + + case TB_BP256V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } + return n == 256?bitpack256v32(in, n, out, b):bitpack32(in, n, out, b); #endif #endif + + //----- External libs ---------- + case P_COPY: return u32enc( in, n, (unsigned *)out); + case P_MCPY: memcpy(out, _in, _n); return out+_n; - #if C_SIMPLE8B - case AM_SIMPLE8B: return vs8benc( in, n, out); + #if C_SIMPLE8B + case AM_SIMPLE8B:return vs8benc( in, n, out); #endif + #if C_FASTPFOR case FP_FASTPFOR: { size_t nvalue = outsize/4; @@ -929,23 +987,22 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int ctou32(out) = nvalue; return out+4+nvalue*4; } - case FP_VBYTE: //return vbyteenc( in, i, (unsigned *)out); - { size_t nvalue=outsize/4; FastPForLib::VariableByte ic; ic.encodeArray((const int32_t *)in, (const size_t)n, (uint32_t *)(out+4), nvalue); ctou32(out)=nvalue; return out+4+nvalue*4; } - case FP_SIMPLE8BRLE: { size_t nvalue=outsize/4; FastPForLib::Simple8b_RLE ic; ic.encodeArray((const int32_t *)in, (const size_t)n, (uint32_t *)(out+4), nvalue); ctou32(out)=nvalue; return out+4+nvalue*4; } + case FP_VBYTE: { size_t nvalue=outsize/4; FastPForLib::VariableByte ic; ic.encodeArray((const int32_t *)in, (const size_t)n, (uint32_t *)(out+4), nvalue); ctou32(out)=nvalue; return out+4+nvalue*4; } + case FP_SIMPLE8BRLE:{ size_t nvalue=outsize/4; FastPForLib::Simple8b_RLE ic; ic.encodeArray((const int32_t *)in, (const size_t)n, (uint32_t *)(out+4), nvalue); ctou32(out)=nvalue; return out+4+nvalue*4; } #endif #if C_LIBFOR - case LF_FORX: //if(b < 0) b = maxbits(in), *out++ = b; return out + for_compress_bits(in, out, n, 0, b); - case LF_FOR: return out + for_compress_unsorted(in, out, n); + case LF_FORX: //if(b < 0) b = maxbits(in), *out++ = b; return out + for_compress_bits(in, out, n, 0, b); + case LF_FOR: return out + for_compress_unsorted(in, out, n); #endif #if C_LITTLEPACK - case LI_PACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } pack32( in, n, b, out); return out + byte_count(n,b); + case LI_PACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } pack32( in, n, b, out); return out + byte_count(n,b); case LI_HORPACK: - case LI_TURBOPACK:if(b < 0) { b = maxbits_length(in,n); *out++ = b; } turbopack32(in, n, b, out); return out + byte_count(n,b); - case LI_SCPACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } scpack32( in, n, b, out); return out + byte_count(n,b); + case LI_TURBOPACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } turbopack32(in, n, b, out); return out + byte_count(n,b); + case LI_SCPACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } scpack32( in, n, b, out); return out + byte_count(n,b); #ifdef __AVX2__ - case LI_BMIPACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } bmipack32( in, n, b, out); return out + byte_count(n,b); + case LI_BMIPACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } bmipack32( in, n, b, out); return out + byte_count(n,b); #endif #endif @@ -961,42 +1018,42 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int #endif #if C_QMX - case P_QMX: { ANT_compress_qmx qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } // { unsigned char *q = qmx_enc(in, n, out+4); ctou32(out) = q - (out+4); return q; - case P_QMX2: { ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } - case P_QMX3: { ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } - case P_QMX4: { ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX: { ANT_compress_qmx qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } // { unsigned char *q = qmx_enc(in, n, out+4); ctou32(out) = q - (out+4); return q; + case P_QMX2: { ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX3: { ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } + case P_QMX4: { ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } #endif #if C_SIMDCOMP - //case SC_PACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } return fastpackwithoutmask32(in, n, (uint32_t *)out, b); + //case SC_PACK: if(b < 0) { b = maxbits_length(in,n); *out++ = b; } return fastpackwithoutmask32(in, n, (uint32_t *)out, b); case SC_FOR: case SC_FORDA: - case SC_SIMDPACK128: if(b < 0) b = maxbits(in), *out++ = b; return (unsigned char *)simdpack_length(in, n, (__m128i *)out, b); + case SC_SIMDPACK128:if(b < 0) b = maxbits(in), *out++ = b; return (unsigned char *)simdpack_length(in, n, (__m128i *)out, b); #ifdef __AVX2__ - case SC_SIMDPACK256: if(b < 0) b = avxmaxbits(in), *out++ = b; return (unsigned char *)avxpackwithoutmaskn(in, n, (unsigned *)out, b); + case SC_SIMDPACK256:if(b < 0) b = avxmaxbits(in), *out++ = b; return (unsigned char *)avxpackwithoutmaskn(in, n, (unsigned *)out, b); #endif #endif #if C_STREAMVBYTE - case P_STREAMVBYTE: return out + streamvbyte_encode(in, n, out); + case P_STREAMVBYTE: return out + streamvbyte_encode(in, n, out); #endif #if C_VARINTG8IU - case P_VARINTG8IU: return vintg8enc(in, n, out); + case P_VARINTG8IU: return vintg8enc(in, n, out); #endif // --------- transform ---------------------------------------- #if C_TRANSFORM - case TB_ZIGZAG_32: bitzigzag32(in, n, (unsigned *)out, 0); return out + n*4; - case TB_TP8_32: _transpose4( (unsigned char *)in, n*4, out); return out + n*4; - case TB_TP8V_32: TRANSPOSE( (unsigned char *)in, n*4, out); return out + n*4; - case TB_TP4V_32: transposen4( (unsigned char *)in, n*4, out); return out + n*4; + case TB_ZIGZAG_32: bitzigzag32(in, n, (unsigned *)out, 0); return out + n*4; + case TB_TP8_32: _transpose4( (unsigned char *)in, n*4, out); return out + n*4; + case TB_TP8V_32: TRANSPOSE( (unsigned char *)in, n*4, out); return out + n*4; + case TB_TP4V_32: transposen4( (unsigned char *)in, n*4, out); return out + n*4; #endif #if C_C_BLOSC - case P_BS_SHUFFLE: shuffle( 4, n*4, (unsigned char *)in, out); return out + n*4; + case P_BS_SHUFFLE: shuffle( 4, n*4, (unsigned char *)in, out); return out + n*4; #endif #if C_BITSHUFFLE - case P_BITSHUFFLE: BITSHUFFLE( (unsigned char *)in, n*4, out);/* bshuf_bitshuffle(in, out, n, 4, 0);*/ return out + n*4; + case P_BITSHUFFLE: BITSHUFFLE( (unsigned char *)in, n*4, out);/* bshuf_bitshuffle(in, out, n, 4, 0);*/ return out + n*4; #endif // --------- transpose + lz77 ---------------------------------------- @@ -1005,16 +1062,16 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int case P_LZT20: { n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); struct lzobj lz; lz.srclen = n; lz.src = sbuf; lz.dst = out; lz.dstlen = n; lz.level = 0; lz.hbits = 16; return out + lzbc01(&lz); } case P_LZT22: { n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); struct lzobj lz; lz.srclen = n; lz.src = sbuf; lz.dst = out; lz.dstlen = n; lz.level = 2; lz.hbits = 26; return out + lzbc2(&lz); } case P_LZT32: { n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); struct lzobj lz; lz.srclen = n; lz.src = sbuf; lz.dst = out; lz.dstlen = n; lz.level = 2; lz.hbits = 26; return out + lzhc2(&lz); } - //case P_VSHUF: { unsigned char *p = vsenc32(in, n, sbuf); p = mheenc(sbuf, p-sbuf, MH_BLK, out+2, out+n*4+256); ctou16(out) = p-(out+2); printf("L=%d ", p-(out+2)); return p; } //out + ranscompress( sbuf, xb, out, out+n*4); - //case P_VSHUF: { unsigned char *p = mheenc(in, n*4, MH_BLK, out+4, out+n*4+256); ctou32(out) = p-(out+4); return p; } //out + ranscompress( sbuf, xb, out, out+n*4); - case P_VSHUF: { unsigned char *p;/* = vsenc32(in, n, sbuf)*/ n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); p = mheenc(sbuf, n/*p-sbuf*/, MH_BLK, out+4, out+n+256); ctou32(out) = p-(out+4); return p; } //out + ranscompress( sbuf, xb, out, out+n*4); + //case P_VSHUF: { unsigned char *p = vsenc32(in, n, sbuf); p = mheenc(sbuf, p-sbuf, MH_BLK, out+2, out+n*4+256); ctou16(out) = p-(out+2); printf("L=%d ", p-(out+2)); return p; } //out + anscompress( sbuf, xb, out, out+n*4); + //case P_VSHUF: { unsigned char *p = mheenc(in, n*4, MH_BLK, out+4, out+n*4+256); ctou32(out) = p-(out+4); return p; } //out + anscompress( sbuf, xb, out, out+n*4); + case P_VSHUF: { unsigned char *p;/* = vsenc32(in, n, sbuf)*/ n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); p = mheenc(sbuf, n/*p-sbuf*/, MH_BLK, out+4, out+n+256); ctou32(out) = p-(out+4); return p; } //out + anscompress( sbuf, xb, out, out+n*4); #endif #if C_BITSHUFFLE - case TB_VS_S1: BITSHUFFLE( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); + case TB_VSIMPLE_S1: BITSHUFFLE( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); #endif - case TB_VS_S4: transposen4( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); - case TB_VS_S8: TRANSPOSE( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); + case TB_VSIMPLE_S4: transposen4( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); + case TB_VSIMPLE_S8: TRANSPOSE( (unsigned char *)in, n*4, sbuf); return vsenc32((unsigned *)sbuf, n, out); #if C_LZ4 #if C_BITSHUFFLE @@ -1022,7 +1079,7 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int #endif case LZ4_ : return out + LZ4_compress((char *)in, (char *)out, n*4); case LZ4_S4: transposen4( (unsigned char *)in, n*4, sbuf); return out + LZ4_compress((char *)sbuf, (char *)out, n*4); - case LZ4_S8: TRANSPOSE( (unsigned char *)in, n*4, sbuf); return out + LZ4_compress((char *)sbuf, (char *)out, n*4); + case LZ4_S8: TRANSPOSE( (unsigned char *)in, n*4, sbuf); return out + LZ4_compress((char *)sbuf, (char *)out, n*4); #endif #if C_C_BLOSC @@ -1035,49 +1092,54 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int #if C_ZLIB case P_ZLIB1: case P_ZLIB2: case P_ZLIB3: case P_ZLIB4: case P_ZLIB5: case P_ZLIB6: case P_ZLIB7: case P_ZLIB8: case P_ZLIB9: { n *= 4; TRANSPOSE( (unsigned char *)in, n, sbuf); uLongf outlen = n; int rc = compress2(out+4, &outlen, sbuf, n, codec-P_ZLIB1+1); if(rc != Z_OK) die("zlib compress2 rc=%d\n", rc); *(unsigned *)out = outlen; return out + 4 + outlen; } - #endif - - case P_MAX ... 63: die("library '%d' not included\n", codec); + #endif + default : die("library '%d' not included\n", codec); } return out; } unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, int outlen, int codec, int lev, char *prm, int b) { - unsigned *out = (unsigned *)_out, n = (outlen+3) / 4; - switch(codec&0x3f) { - case P_COPY: return u32dec( (unsigned *)in, n, out); - case P_MCPY: return u32dec( (unsigned *)in, n, out); //memcpy(_out, in, _n); return in+_n; + unsigned *out = (unsigned *)_out, n = (outlen+3) / 4,x; + switch(codec) { + case TB_VBYTE: return vbdec32( in, n, out); // case P_VU: return vudec32( in, n, out); + case TB_VSIMPLE: return vsdec32( in, n, out); + case TB_EF: return in; + case TB_PFDA : return p4decx32( in, n, out); - // case P_VU: return vudec32( in, n, out); - case TB_VBYTE: return vbdec32( in, n, out); + case TB_FOR: vbxget32(in, x); b = *in++; return bitfunpack32( in, n, out, x, b); + case TB_FORDA: vbxget32(in, x); b = *in++; return bitfunpackx32(in, n, out, x, b); - case TB_PFORDA : return p4decx32( in, n, out); - #if C_SIMPLE8B - case AM_SIMPLE8B: return vs8bdec( in, n, out); - #endif - case TB_VSIMPLE: return vsdec32( in, n, out); - case TB_EFANO: return in; - case TB_FOR: if(b < 0) b = *in++; return bitfunpack32( in, n, out, 0, b); - case TB_FORDA: if(b < 0) b = *in++; return _bitunpackx32( in, n, out, b); - case TB_PACK: if(b < 0) b = *in++; return bitunpack32( in, n, out, b); - case TB_NPACK: return in+bitnunpack32( in, n, out); + case TB_BP: if(b < 0) b = *in++; return bitunpack32( in, n, out, b); + case TB_BPN: return in+bitnunpack32( in, n, out); #if C_TURBOPFORV - case TB_PFOR128 : __builtin_prefetch(in+256);return n == 128?p4dec128v32(in, n, out):p4dec32(in, n, out); - case TB_PFORN128 : return in+p4ndec128v32(in, n, out); + case TB_PF128V : __builtin_prefetch(in+256);return n == 128?p4dec128v32(in, n, out):p4dec32(in, n, out); + case TB_PFN128V : return in+p4ndec128v32(in, n, out); + + case TB_EF128V: return in; + + case TB_FOR128V: vbxget32(in, x); b = *in++;return bitfunpack128v32( in, n, out, x, b); + + case TB_BP128V: if(b < 0) b = *in++; return n != 128?bitunpack32(in, n, out, b):bitunpack128v32(in, n, out, b); #ifdef __AVX2__ - case TB_PFOR256 : __builtin_prefetch(in+256);return n == 256?p4dec256v32(in, n, out):p4dec32(in, n, out); - case TB_PFORN256 : return in+p4ndec256v32(in, n, out); - case TB_PACK256V: if(b < 0) b = *in++; return n != 256?bitunpack32(in, n, out, b):bitunpack256v32(in, n, out, b); + case TB_FOR256V: vbxget32(in, x); b = *in++;return bitfunpack256v32( in, n, out, x, b); + + case TB_PF256V : __builtin_prefetch(in+256); return n == 256?p4dec256v32(in, n, out):p4dec32(in, n, out); + case TB_PFN256V : return in+p4ndec256v32(in, n, out); + case TB_BP256V: if(b < 0) b = *in++; return n != 256?bitunpack32(in, n, out, b):bitunpack256v32(in, n, out, b); #endif - case TB_EFANOV: return in; - - case TB_PACK128V: if(b < 0) b = *in++; return n != 128?bitunpack32(in, n, out, b):bitunpack128v32(in, n, out, b); #endif - #if C_TURBOPFORH - case TB_PACK128H: if(b < 0) b = *in++; return n == 128?bitunpack128h32(in, n, out, b):bitunpack32(in, n, out, b); + case TB_BP128H: if(b < 0) b = *in++; return n == 128?bitunpack128h32(in, n, out, b):bitunpack32(in, n, out, b); #endif - + + //---- external libs ------------- + case P_COPY: return u32dec( (unsigned *)in, n, out); + case P_MCPY: memcpy(_out, in, _n); return in+_n; + + #if C_SIMPLE8B + case AM_SIMPLE8B: return vs8bdec( in, n, out); + #endif + #if C_FASTPFOR case FP_FASTPFOR: { size_t nvalue = n; @@ -1115,7 +1177,7 @@ unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, in { size_t nvalue=n; FastPForLib::VariableByte ic; return (unsigned char *)ic.decodeArray((const int32_t *)(in+4), ctou32(in), (uint32_t *)out, nvalue); } case FP_SIMPLE8BRLE: { size_t nvalue=n; FastPForLib::Simple8b_RLE ic; ic.decodeArray((const int32_t *)(in+4), ctou32(in), (uint32_t *)out, nvalue); return in+4+ctou32(in)*4; } - #endif + #endif #if C_LIBFOR case LF_FORX: return for_selectx(in, n, out); //if(b < 0) b = *in++; return for_selectx(in, n, out, 0, b);// @@ -1147,7 +1209,7 @@ unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, in //case SC_PACK: if(b < 0) b = *in++; return fastunpack32((uint32_t *)in, n, out, b); case SC_FOR: case SC_SIMDPACK128: if(b < 0) b = *in++; return (unsigned char *)simdunpack_length( (__m128i *)in, n, out, b); - //case SC_FORDA: if(b < 0) b = *in++; return simdfor_selectx(in, n, out, 0, b); + case SC_FORDA: if(b < 0) b = *in++; return simdfor_selectx(in, n, out, 0, b); #ifdef __AVX2__ case SC_SIMDPACK256: if(b < 0) b = *in++; return (unsigned char *)avxunpackn( (unsigned *)in, n, out, b); #endif @@ -1167,58 +1229,57 @@ unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, in #if C_VARINTG8IU case P_VARINTG8IU: return vintg8dec(in, n, out); #endif - //---------- transpose + lz77 ---------------------- + //---------- transform ---------------------- #if C_TRANSFORM - case TB_ZIGZAG_32: memcpy(out, in, n*4); bitunzigzag32(out, n, 0); return in + n*4; - case TB_TP8_32: _untranspose4( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; - case TB_TP8V_32: UNTRANSPOSE( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; - case TB_TP4V_32: untransposen4( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; + case TB_ZIGZAG_32: memcpy(out, in, n*4); bitunzigzag32(out, n, 0); return in + n*4; + case TB_TP8_32: _untranspose4( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; + case TB_TP8V_32: UNTRANSPOSE( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; + case TB_TP4V_32: untransposen4( (unsigned char *)in, n*4, (unsigned char *)out); return in + n*4; #endif #if C_C_BLOSC - case P_BS_SHUFFLE: unshuffle( 4, n*4, (unsigned char *)in, (unsigned char *)out); return in + n*4; - #endif - + case P_BS_SHUFFLE: unshuffle( 4, n*4, (unsigned char *)in, (unsigned char *)out); return in + n*4; + #endif #if C_BITSHUFFLE case P_BITSHUFFLE: BITUNSHUFFLE(in, n*4, (unsigned char *)out); /*bshuf_bitunshuffle(in, out, n, 4, 0);*/ return in + n*4; #endif //---------- transpose + lz77 ---------------------- #if C_LZTURBO - case P_LZT10: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lz8d(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; + case P_LZT10: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lz8d(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; case P_LZT20: - case P_LZT22: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lzbd(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; - case P_LZT32: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lzhd(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; - //case P_VSHUF: { unsigned inlen = ctou16(in); printf("l=%d ", inlen); in+=2; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); vsdec32(sbuf, n, out); } break;// in = ransdecompress( in, n*4, sbuf); - //case P_VSHUF: { unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, out, n*4); } break;// in = ransdecompress( in, n*4, sbuf); - case P_VSHUF: { unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); /*vsdec32(sbuf, n, out);*/ } break;// in = ransdecompress( in, n*4, sbuf); + case P_LZT22: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lzbd(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; + case P_LZT32: { struct lzobj lz; lz.dstlen = n*4; lz.src = in; lz.dst = sbuf; lz.level = 0; in += lzhd(&lz); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; + //case P_VSHUF: { unsigned inlen = ctou16(in); printf("l=%d ", inlen); in+=2; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); vsdec32(sbuf, n, out); } break;// in = ansdecompress( in, n*4, sbuf); + //case P_VSHUF: { unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, out, n*4); } break;// in = ansdecompress( in, n*4, sbuf); + case P_VSHUF: { unsigned inlen = ctou32(in); in+=4; in = mhdec(in, inlen, MH_BLK, sbuf, n*4); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); /*vsdec32(sbuf, n, out);*/ } break;// in = ansdecompress( in, n*4, sbuf); #endif #if C_BITSHUFFLE - case TB_VS_S1: in = vsdec32(in, n, (unsigned *)sbuf); BITUNSHUFFLE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ + case TB_VSIMPLE_S1: in = vsdec32(in, n, (unsigned *)sbuf); BITUNSHUFFLE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ #endif - case TB_VS_S4: in = vsdec32(in, n, (unsigned *)sbuf); untransposen4(sbuf, n*4, (unsigned char *)out); break; - case TB_VS_S8: in = vsdec32(in, n, (unsigned *)sbuf); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ + case TB_VSIMPLE_S4: in = vsdec32(in, n, (unsigned *)sbuf); untransposen4(sbuf, n*4, (unsigned char *)out); break; + case TB_VSIMPLE_S8: in = vsdec32(in, n, (unsigned *)sbuf); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ #if C_LZ4 #if C_BITSHUFFLE - case LZ4_S1: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); BITUNSHUFFLE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ + case LZ4_S1: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); BITUNSHUFFLE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ #endif - case LZ4_ : in += LZ4_decompress_fast((char *)in, (char *)out, n*4); break; - case LZ4_S4: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); untransposen4(sbuf, n*4, (unsigned char *)out); break; - case LZ4_S8: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ + case LZ4_ : in += LZ4_decompress_fast((char *)in, (char *)out, n*4); break; + case LZ4_S4: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); untransposen4(sbuf, n*4, (unsigned char *)out); break; + case LZ4_S8: in += LZ4_decompress_fast((char *)in, (char *)sbuf, n*4); UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); break;/*bshuf_bitunshuffle(sbuf, out, n*4/32, 32, 0); */ #endif #if C_C_BLOSC case P_BS_LZ: case P_BS_LZ4: - case P_BS_ZLIB: { blosc_decompress(in, out, n*4); size_t nbytes, cbytes,blocksize; blosc_cbuffer_sizes(in, &nbytes, &cbytes, &blocksize); return in+cbytes; } + case P_BS_ZLIB: { blosc_decompress(in, out, n*4); size_t nbytes, cbytes,blocksize; blosc_cbuffer_sizes(in, &nbytes, &cbytes, &blocksize); return in+cbytes; } #endif #if C_ZLIB case P_ZLIB1: case P_ZLIB2: case P_ZLIB3: case P_ZLIB4: case P_ZLIB5: case P_ZLIB6: case P_ZLIB7: case P_ZLIB8: case P_ZLIB9: { uLongf outsize = n*4; int l = *(unsigned *)in, rc = uncompress(sbuf, &outsize, in+4, l); in += 4 + l; UNTRANSPOSE(sbuf, n*4, (unsigned char *)out); } break; #endif - case P_MAX ... 63: die("library '%d' not included\n", codec); + default : die("library '%d' not included\n", codec); } return in; } @@ -1236,7 +1297,7 @@ char *codver(int codec, char *v, char *s) { case P_ZSTD: sprintf(s,"%d.%d.%d", ZSTD_VERSION_MAJOR, ZSTD_VERSION_MINOR, ZSTD_VERSION_RELEASE); return s; #endif - default: strcpy(s,v); + default: if(v) strcpy(s,v); } return s; }