IcBench App: External libs

2017-10-14 16:24:39 +02:00
parent f7e4ff6c25
commit 7aad760aa1
8 changed files with 6926 additions and 29 deletions
--- a/ext/bench_/bench/compress_integer_qmx_improved.cpp
+++ b/ext/bench_/bench/compress_integer_qmx_improved.cpp
--- a/ext/bench_/bench/compress_integer_qmx_improved.h
+++ b/ext/bench_/bench/compress_integer_qmx_improved.h
@ -0,0 +1,158 @@
+/*
+	COMPRESS_INTEGER_QMX_IMPROVED.H
+	-------------------------------
+	Copyright (c) 2014-2017 Andrew Trotman
+	Released under the 2-clause BSD license (See:https://en.wikipedia.org/wiki/BSD_licenses)
+*/
+#pragma once
+
+#include <stdint.h>
+#include <assert.h>
+
+namespace JASS
+	{
+	/*
+		CLASS COMPRESS_INTEGER_QMX_IMPROVED
+		-----------------------------------
+	*/
+	/*!
+		@brief QMX compression improved (smaller and faster to decode)
+		@details  Trotman & Lin  describe several improvements to the QMX codex in:
+
+		A. Trotman, J. Lin (2016), In Vacuo and In Situ Evaluation of SIMD Codecs, Proceedings of The 21st Australasian Document Computing Symposium (ADCS 2016
+		
+		including removal of the vbyte encoded length from the end of the encoded sequence.  This version of QMX is the original QMX with that improvement added,
+		but none of the other imprivements suggested by Trotman & Lin.  This makes the encoded sequence smaller, and faster to decode, than any of the other
+		alrernatives suggested.  It does not include the code to prevent read and write overruns from the encoded string and into the decode buffer.  To account
+		for overwrites make sure the decode-into buffer is at least 256 integers larger than required.  To prevent over-reads from the encoded string make sure
+		that that string is at least 16 bytes longer than needed.
+		
+		At the request of Matthias Petri (University of Melbourne), the code no longer requires SIMD-word alignment to decode (the read and write 
+		instructions have been changed from aligned to unaligned since Intel made them faster).
+		
+		For details on the original QMX encoding see:
+		
+		A. Trotman (2014), Compression, SIMD, and Postings Lists, Proceedings of the 19th Australasian Document Computing Symposium (ADCS 2014)
+	*/
+	class compress_integer_qmx_improved
+		{
+		typedef uint32_t integer;
+		
+		private:
+			uint8_t *length_buffer;					///< Stores the number of bits needed to compress each integer
+			uint64_t length_buffer_length;		///< The length of length_buffer
+			uint32_t *full_length_buffer;			///< If the run_length is too short then 0-pad into this buffer
+
+		private:
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::WRITE_OUT()
+				------------------------------------------
+			*/
+			/*!
+				@brief Encode and write out the sequence into the buffer
+				@param buffer [in] where to write the encoded sequence
+				@param source [in] the integer sequence to encode
+				@param raw_count [in] the numnber of integers to encode
+				@param size_in_bits [in] the size, in bits, of the largest integer
+				@param buffer_length [in] the length of buffer, in bytes
+			*/
+			void write_out(uint8_t **buffer, uint32_t *source, uint32_t raw_count, uint32_t size_in_bits, uint8_t **length_buffer);
+
+		public:
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::COMPRESS_INTEGER_QMX_IMPROVED()
+				--------------------------------------------------------------
+			*/
+			/*!
+				@brief Constructor
+			*/
+			compress_integer_qmx_improved() :
+				length_buffer(nullptr),
+				length_buffer_length(0),
+				full_length_buffer(new uint32_t [256 * 16])
+				{
+				/* Nothing */
+				}
+				
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::~COMPRESS_INTEGER_QMX_IMPROVED()
+				---------------------------------------------------------------
+			*/
+			/*!
+				@brief Destructor
+			*/
+			virtual ~compress_integer_qmx_improved()
+				{
+				delete [] length_buffer;
+				delete [] full_length_buffer;
+				}
+
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::ENCODE()
+				---------------------------------------
+			*/
+			/*!
+				@brief Encode a sequence of integers returning the number of bytes used for the encoding, or 0 if the encoded sequence doesn't fit in the buffer.
+				@param encoded [out] The sequence of bytes that is the encoded sequence.
+				@param encoded_buffer_length [in] The length (in bytes) of the output buffer, encoded.
+				@param source [in] The sequence of integers to encode.
+				@param source_integers [in] The length (in integers) of the source buffer.
+				@return The number of bytes used to encode the integer sequence, or 0 on error (i.e. overflow).
+			*/
+			virtual size_t encode(void *encoded, size_t encoded_buffer_length, const integer *source, size_t source_integers);
+
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::DECODE()
+				---------------------------------------
+			*/
+			/*!
+				@brief Decode a sequence of integers encoded with this codex.
+				@param decoded [out] The sequence of decoded integers.
+				@param integers_to_decode [in] The minimum number of integers to decode (it may decode more).
+				@param source [in] The encoded integers.
+				@param source_length [in] The length (in bytes) of the source buffer.
+			*/
+			virtual void decode(integer *decoded, size_t integers_to_decode, const void *source, size_t source_length);
+
+
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::ENCODEARRAY()
+				--------------------------------------------
+				provided for backwards compatibility
+			*/
+			virtual void encodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t *nvalue)
+				{
+				*nvalue = encode(out, *nvalue, in, len);
+				}
+			
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::DECODEARRAY()
+				--------------------------------------------
+				provided for backwards compatibility
+			*/
+			virtual void decodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t nvalue)
+				{
+				decode(out, nvalue, in, len);
+				}
+
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::UNITTEST_ONE()
+				---------------------------------------------
+			*/
+			/*!
+				@brief Test one sequence to make sure it encodes and decodes to the same thing.  Assert if not.
+				@para sequence [in] the sequernce to encode.
+			*/
+			static void unittest_one(const std::vector<uint32_t> &sequence);
+
+			/*
+				COMPRESS_INTEGER_QMX_IMPROVED::UNITTEST()
+				-----------------------------------------
+			*/
+			/*!
+				@brief Unit test this class
+			*/
+			static void unittest(void);
+		};
+	}
+
--- a/ext/beplug_.h
+++ b/ext/beplug_.h
@ -52,10 +52,7 @@ unsigned char *for_selectx( unsigned char *__restrict in, unsigned n, unsigned *
  #endif

  #if C_QMX
-#include "bench_/bench/compress_qmx.h" 
-#include "bench_/bench/compress_qmx_v2.h"
-#include "bench_/bench/compress_qmx_v3.h"
-#include "bench_/bench/compress_qmx_v4.h"
+#include "JASSv2/source/compress_integer_qmx_improved.h" 
  #endif
  
  #if C_ZLIB
--- a/ext/beplugc_.c
+++ b/ext/beplugc_.c
@ -77,12 +77,9 @@
    case PC_RICE:       return rcenc32(  in, n, (unsigned *)out); 
    case PC_OPTPFD:     return optpfdenc32(in, n, out); //if(n < 128) return vbyteenc(in, n, (unsigned *)out); else { unsigned tmp[2048]; for(i = 0; i < n; i++) tmp[i] = in[i]; return out + OPT4(tmp, n, (unsigned *)out); }
      #endif 
-	  
+	  //encode(void *encoded, size_t encoded_buffer_length, const integer *source, size_t source_integers)
 	  #if C_QMX
-    case P_QMX:  		{ ANT_compress_qmx    qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; } // { unsigned char *q = qmx_enc(in, n, out+4); ctou32(out) = q - (out+4); return q;
-    case P_QMX2: 		{ ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; }
-    case P_QMX3: 		{ ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; }
-    case P_QMX4: 		{ ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)in, (uint64_t)n); ctou32(out)=r; return out+4+r; }	 
+    case P_QMX:  		{ JASS::compress_integer_qmx_improved    qmx; unsigned r=qmx.encode(out+4, outsize, (uint32_t *)in, (size_t)n); ctou32(out)=r; return out+4+r; } 
 	  #endif
 	  
      #if C_SIMDCOMP
--- a/ext/beplugcs_.c
+++ b/ext/beplugcs_.c
@ -30,11 +30,7 @@
      #endif

 	  #if C_QMX
-  //case P_QMX:  { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); unsigned char *q = qmx_enc(pa, n, out+4); *(unsigned *)out = q - (out+4); return q; }
-    case P_QMX:  { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); ANT_compress_qmx    qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; }
-    case P_QMX2: { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); ANT_compress_qmx_v2 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; }
-    case P_QMX3: { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); ANT_compress_qmx_v3 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; }
-    case P_QMX4: { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); ANT_compress_qmx_v4 qmx; unsigned r=qmx.compress(out+4, outsize, (uint32_t *)pa, (uint64_t)n); ctou32(out)=r; return out+4+r; }	 
+    case P_QMX:  { bitdienc32( in+1, --n, pa, in[0], mdelta); vbxput32(out, in[0]); JASS::compress_integer_qmx_improved qmx; unsigned r=qmx.encode(out+4, outsize, (uint32_t *)pa, (uint64_t)n); return out+4+r; }
 	  #endif  

      #if C_SIMDCOMP				  
--- a/ext/beplugd_.c
+++ b/ext/beplugd_.c
@ -94,12 +94,8 @@
 	  #if C_STREAMVBYTE 
    case P_STREAMVBYTE:  return in +  streamvbyte_decode(in, out, n); 
      #endif 
-	 
-      #if C_QMX    										//case P_QMX: return qmx_dec(in+4, ctou32(in), out, n); 
-	case P_QMX:  { ANT_compress_qmx  qmx;   qmx.decompress(out, n,  in+4, ctou32(in)); return in+4+ctou32(in);} 	// { unsigned char *q = qmx_enc(in, n, out+4); ctou32(out) = q - (out+4); return q;
-    case P_QMX2: { ANT_compress_qmx_v2 qmx; qmx.decompress(out, n,  in+4, ctou32(in)); return in+4+ctou32(in);}
-    case P_QMX3: { ANT_compress_qmx_v3 qmx; qmx.decompress(out, n,  in+4, ctou32(in)); return in+4+ctou32(in);}
-    case P_QMX4: { ANT_compress_qmx_v4 qmx; qmx.decompress(out, n,  in+4, ctou32(in)); return in+4+ctou32(in);}	 
+      #if C_QMX    										
+	case P_QMX:  { JASS::compress_integer_qmx_improved qmx; qmx.decode(out, n,  in+4, ctou32(in)); return in+4+ctou32(in); }
      #endif

      #if C_VARINTG8IU
--- a/ext/beplugds_.c
+++ b/ext/beplugds_.c
@ -27,12 +27,8 @@
      else {      _VBGET32(in, x, *out = x); unsigned all_array[2048]; in = (unsigned char *)detailed_p4_decode(out+1, (unsigned *)in, all_array); }
                                                                                            bitdidec32(out, n, -mdelta, mdelta); break;*/
 	  #endif
-    //case P_QMX:  { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; in = qmx_dec(in+4, l, out+1, n-1); bitdidec32(out+1, n-1, x, mdelta); break; }   
-      #if C_QMX    										//case P_QMX: return qmx_dec(in+4, ctou32(in), out, n); 
-	case P_QMX:  { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx  qmx;   qmx.decompress(out+1, n-1,  in+4, ctou32(in)); bitdidec32(out+1, n-1, x, mdelta); return in+4+ctou32(in);} 
-    case P_QMX2: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v2 qmx; qmx.decompress(out+1, n-1,  in+4, ctou32(in)); bitdidec32(out+1, n-1, x, mdelta); return in+4+ctou32(in);}
-    case P_QMX3: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v3 qmx; qmx.decompress(out+1, n-1,  in+4, ctou32(in)); bitdidec32(out+1, n-1, x, mdelta); return in+4+ctou32(in);}
-    case P_QMX4: { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; ANT_compress_qmx_v4 qmx; qmx.decompress(out+1, n-1,  in+4, ctou32(in)); bitdidec32(out+1, n-1, x, mdelta); return in+4+ctou32(in);}	 
+      #if C_QMX    								
+    case P_QMX:  { vbxget32(in, x); *out = x; unsigned l = *(unsigned *)in; JASS::compress_integer_qmx_improved qmx; qmx.decode(out+1, n-1,  in+4, ctou32(in)); bitdidec32(out+1, n-1, x, mdelta); return in+4+ctou32(in);} 
      #endif
 	  
 	  #if C_SIMDCOMP
--- a/ext/beplugr_.h
+++ b/ext/beplugr_.h
@ -41,9 +41,6 @@
  { LI_BMIPACK,		"LI_BMIPack256",	C_LITTLEPACK,	0,    	 0,"","Bit packing (avx2)"	},

  { P_QMX,			"qmx",		    	C_QMX,	    	0,    	 0,"","QMX SIMD (inefficient for small blocks)"	},
-  { P_QMX2,			"qmx2",		    	C_QMX,	    	0,    	 0,"","QMX SIMD (inefficient for small blocks)"	},
-  { P_QMX3,			"qmx3",		    	C_QMX,	    	0,    	 0,"","QMX SIMD (inefficient for small blocks)"	},
-  { P_QMX4,			"qmx4",		    	C_QMX,	    	0,    	 0,"","QMX SIMD (inefficient for small blocks)"	},
  //-------- lz77 + [delta] + transpose/shuffle ---------------
  { P_LZT,			"LzTurbo",	    	C_LZTURBO, 		BLK_SIZE, 0, "20,21,22,32" },
  { P_VSHUF,		"VSimpleANS",		C_LZTURBO, 		BLK_SIZE, 0, "20,21,22,32" },