.

2016-06-19 10:50:44 +02:00
parent 863e2fe6a9
commit 70e443873e
10 changed files with 6806 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -311,4 +311,4 @@ header files to use with documentation:<br />
   - [On Inverted Index Compression for Search Engine Efficiency](http://www.dcs.gla.ac.uk/~craigm/publications/catena14compression.pdf)
   - [Google's Group Varint Encoding](http://static.googleusercontent.com/media/research.google.com/de//people/jeff/WSDM09-keynote.pdf)

-Last update:  24 APR 2016
+Last update:  19 JUN 2016
--- a/ext/ext.c
+++ b/ext/ext.c
@ -12,6 +12,7 @@

 //- Optional external libraries. Activate also in makefile -----
 //#define _LIBFOR // libfor 
+#define _QMX

 //#define _BTSHUF   			// https://github.com/kiyo-masui/bitshuffle

@ -47,6 +48,10 @@
 #include "for/for.h"
  #endif

+  #ifdef _QMX
+#include "qmx/compress_qmx.h"
+  #endif
+
  #ifdef _ZLIB
 #include <zlib.h>
  #endif
--- a/ext/qmx/GNUmakefile
+++ b/ext/qmx/GNUmakefile
@ -0,0 +1,10 @@
+#
+#	OS X and Linux Makefile
+#
+
+compress_qmx :
+	g++ -O3 -msse4 compress_qmx.c -o compress_qmx
+
+clean :
+	rm compress_qmx
+
--- a/ext/qmx/README
+++ b/ext/qmx/README
@ -0,0 +1,16 @@
+QMX README
+----------
+The source is released under the BSD license (you choose which one). 
+
+See (and please cite), in the ACM Digital Library (and on my website):
+
+A. Trotman (2014), Compression, SIMD, and Postings Lists. In Proceedings of the 19th Australasian Document Computing Symposium (ADCS 2014)
+
+One C++ class is provided.  It compiles and runs on Linux, OS X, and Windows.  Use make to build the executable that compresses and decompressed one string (and checks that the code works).
+
+IMPORTANT NOTE
+--------------
+As QMX decodes in "chunks", it can (i.e. will normally) decode more integers than requested.  In other words, it will normally overflow the output buffer.  Allowing for 256 "extras" will suffice.  These extras will be garbage.  Although it is possible to encode to prevent (much) "junk", in this implementation the decision was made to favour smaller compressed size and the consequence is more junk decoded.
+
+Andrew
+
--- a/ext/qmx/compress_qmx.cc
+++ b/ext/qmx/compress_qmx.cc
--- a/ext/qmx/compress_qmx.h
+++ b/ext/qmx/compress_qmx.h
@ -0,0 +1,22 @@
+/*
+	COMPRESS_QMX.H
+	--------------
+*/
+#ifndef COMPRESS_QMX_H_
+#define COMPRESS_QMX_H_
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned char *qmx_enc( const uint32_t *in, unsigned n, unsigned char *out);
+unsigned char *qmx_dec(const unsigned char *in, unsigned len, uint32_t *out, unsigned n);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
--- a/ext/qmx/makefile
+++ b/ext/qmx/makefile
@ -0,0 +1,10 @@
+#
+#	Windows Makefile
+#
+
+compress_qmx.exe :
+	cl /Ox /Tp compress_qmx.c
+
+clean :
+	del compress_qmx.obj compress_qmx.exe
+
--- a/icbench.c
+++ b/icbench.c
@ -148,6 +148,7 @@ enum {  P_CPY,                                                            // cop
        P_SV, P_SVANS,             P_S16, P_S64,                          // simple family: , simpleV, simple16, simple-8b
        P_P4D, P_P4DR,             P_OPTP4,                               // PFor, PForDelta
                                   P_LIBFOR,                              // For 
+								   P_VSQMX,								  // QMX	
                                   P_LZT10, P_LZT20, P_LZT22,		      // LzTurbo
                                   P_LZ4,                          		  // lz4
                                   P_BSHUF, P_BLZ, P_BLZ4, P_BZLIB,       // https://github.com/Blosc/c-blosc
@ -177,6 +178,7 @@ unsigned char *beenc(unsigned *__restrict in, size_t n, unsigned char *__restric
    case P_SV:     return vsenc32(  in, n, out);
    case P_S16:    return vs16enc(  in, n, (unsigned *)out); 
    case P_S64:    return vs8benc(  in, n, out); 
+    case P_VSQMX:  { unsigned char *q = qmx_enc(in, n, out+4); *(unsigned *)out = q - (out+4); return q; }
      // --------- elias fano ----------------------------------------------
    case P_EFANO:  return out; 
      // --------- PFor ----------------------------------------------------
@ -254,6 +256,7 @@ unsigned char *bedec(unsigned char *__restrict in, size_t n, unsigned *__restric

    case P_S16:    return vs16dec(  (unsigned *)in, n, out);  
    case P_S64:    return vs8bdec(  in, n, out);  
+    case P_VSQMX:    { unsigned l = *(unsigned *)in;  return qmx_dec(in+4, l, out, n); }   
      // --------- elias fano -----------------------------------------------
    case P_EFANO:  return in;
      // --------- PFor -----------------------------------------------------
@ -615,6 +618,9 @@ struct libss libss[] = {
  { P_VBP,    "VBytePoly"           },
    #endif

+    #ifdef _QMX
+  { P_VSQMX,    "qmx"            	  },  
+    #endif
  // ----- Simple family -----
  { P_SV,     "VSimple"             },
 //  { P_SVANS,      "VSimpleANS", BLK_SIZE },
--- a/10
+++ b/10
@ -41,7 +41,7 @@ SIMDCOMP=$(SIMDCOMPD)bitpacka.o $(SIMDCOMPD)src/simdintegratedbitpacking.o $(SIM

 #LIBFOR=ext/for/for.o
 MVB=ext/MaskedVByte/src/varintencode.o ext/MaskedVByte/src/varintdecode.o
-
+QMX=ext/qmx/compress_qmx.o 
 # Lzturbo not included
 #LZT=../lz/lz8c0.o ../lz/lz8d.o ../lz/lzbc0.o ../lz/lzbd.o

@ -60,10 +60,10 @@ LZ4=ext/lz4.o

 #BSHUFFLE=ext/bitshuffle/src/bitshuffle.o

-OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)
+OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(QMX) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)

 icbench: $(OBJS)
-	$(CC) $(OBJS) -lm -o icbench $(LFLAGS)
+	$(CXX) $(OBJS) -lm -o icbench $(LFLAGS)

 idxseg:   idxseg.o
 	$(CC) idxseg.o -o idxseg
@ -83,10 +83,10 @@ idxqry:   idxqry.o bitunpack.o vp4dd.o bitunpackv.o bitutil.o
 	$(CC) -O3 $(CFLAGS) $< -c -o $@

 .cc.o:
-	$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
+	$(CXX) -O3 -DNDEBUG $(MARCH) $< -c -o $@

 .cpp.o:
-	$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
+	$(CXX) -O3 -DNDEBUG $< -c -o $@
 	
 clean:
 	@find . -type f -name "*\.o" -delete -or -name "*\~" -delete -or -name "core" -delete
--- a/vp4dd.c
+++ b/vp4dd.c
@ -377,6 +377,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
 #undef P4DDD
 #undef P4DDECD
 #undef BITUNPACKD
+#undef BITUNPACKD_
 #undef BITUNDD

 #define P4DDD      p4dddv