This commit is contained in:
powturbo
2016-06-19 10:50:44 +02:00
parent 863e2fe6a9
commit 70e443873e
10 changed files with 6806 additions and 6 deletions

View File

@ -311,4 +311,4 @@ header files to use with documentation:<br />
- [On Inverted Index Compression for Search Engine Efficiency](http://www.dcs.gla.ac.uk/~craigm/publications/catena14compression.pdf)
- [Google's Group Varint Encoding](http://static.googleusercontent.com/media/research.google.com/de//people/jeff/WSDM09-keynote.pdf)
Last update: 24 APR 2016
Last update: 19 JUN 2016

View File

@ -12,6 +12,7 @@
//- Optional external libraries. Activate also in makefile -----
//#define _LIBFOR // libfor
#define _QMX
//#define _BTSHUF // https://github.com/kiyo-masui/bitshuffle
@ -47,6 +48,10 @@
#include "for/for.h"
#endif
#ifdef _QMX
#include "qmx/compress_qmx.h"
#endif
#ifdef _ZLIB
#include <zlib.h>
#endif

10
ext/qmx/GNUmakefile Normal file
View File

@ -0,0 +1,10 @@
#
# OS X and Linux Makefile
#
compress_qmx :
g++ -O3 -msse4 compress_qmx.c -o compress_qmx
clean :
rm compress_qmx

16
ext/qmx/README Normal file
View File

@ -0,0 +1,16 @@
QMX README
----------
The source is released under the BSD license (you choose which one).
See (and please cite), in the ACM Digital Library (and on my website):
A. Trotman (2014), Compression, SIMD, and Postings Lists. In Proceedings of the 19th Australasian Document Computing Symposium (ADCS 2014)
One C++ class is provided. It compiles and runs on Linux, OS X, and Windows. Use make to build the executable that compresses and decompressed one string (and checks that the code works).
IMPORTANT NOTE
--------------
As QMX decodes in "chunks", it can (i.e. will normally) decode more integers than requested. In other words, it will normally overflow the output buffer. Allowing for 256 "extras" will suffice. These extras will be garbage. Although it is possible to encode to prevent (much) "junk", in this implementation the decision was made to favour smaller compressed size and the consequence is more junk decoded.
Andrew

6730
ext/qmx/compress_qmx.cc Normal file

File diff suppressed because it is too large Load Diff

22
ext/qmx/compress_qmx.h Normal file
View File

@ -0,0 +1,22 @@
/*
COMPRESS_QMX.H
--------------
*/
#ifndef COMPRESS_QMX_H_
#define COMPRESS_QMX_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
unsigned char *qmx_enc( const uint32_t *in, unsigned n, unsigned char *out);
unsigned char *qmx_dec(const unsigned char *in, unsigned len, uint32_t *out, unsigned n);
#ifdef __cplusplus
}
#endif
#endif

10
ext/qmx/makefile Normal file
View File

@ -0,0 +1,10 @@
#
# Windows Makefile
#
compress_qmx.exe :
cl /Ox /Tp compress_qmx.c
clean :
del compress_qmx.obj compress_qmx.exe

View File

@ -148,6 +148,7 @@ enum { P_CPY, // cop
P_SV, P_SVANS, P_S16, P_S64, // simple family: , simpleV, simple16, simple-8b
P_P4D, P_P4DR, P_OPTP4, // PFor, PForDelta
P_LIBFOR, // For
P_VSQMX, // QMX
P_LZT10, P_LZT20, P_LZT22, // LzTurbo
P_LZ4, // lz4
P_BSHUF, P_BLZ, P_BLZ4, P_BZLIB, // https://github.com/Blosc/c-blosc
@ -177,6 +178,7 @@ unsigned char *beenc(unsigned *__restrict in, size_t n, unsigned char *__restric
case P_SV: return vsenc32( in, n, out);
case P_S16: return vs16enc( in, n, (unsigned *)out);
case P_S64: return vs8benc( in, n, out);
case P_VSQMX: { unsigned char *q = qmx_enc(in, n, out+4); *(unsigned *)out = q - (out+4); return q; }
// --------- elias fano ----------------------------------------------
case P_EFANO: return out;
// --------- PFor ----------------------------------------------------
@ -254,6 +256,7 @@ unsigned char *bedec(unsigned char *__restrict in, size_t n, unsigned *__restric
case P_S16: return vs16dec( (unsigned *)in, n, out);
case P_S64: return vs8bdec( in, n, out);
case P_VSQMX: { unsigned l = *(unsigned *)in; return qmx_dec(in+4, l, out, n); }
// --------- elias fano -----------------------------------------------
case P_EFANO: return in;
// --------- PFor -----------------------------------------------------
@ -615,6 +618,9 @@ struct libss libss[] = {
{ P_VBP, "VBytePoly" },
#endif
#ifdef _QMX
{ P_VSQMX, "qmx" },
#endif
// ----- Simple family -----
{ P_SV, "VSimple" },
// { P_SVANS, "VSimpleANS", BLK_SIZE },

View File

@ -41,7 +41,7 @@ SIMDCOMP=$(SIMDCOMPD)bitpacka.o $(SIMDCOMPD)src/simdintegratedbitpacking.o $(SIM
#LIBFOR=ext/for/for.o
MVB=ext/MaskedVByte/src/varintencode.o ext/MaskedVByte/src/varintdecode.o
QMX=ext/qmx/compress_qmx.o
# Lzturbo not included
#LZT=../lz/lz8c0.o ../lz/lz8d.o ../lz/lzbc0.o ../lz/lzbd.o
@ -60,10 +60,10 @@ LZ4=ext/lz4.o
#BSHUFFLE=ext/bitshuffle/src/bitshuffle.o
OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)
OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(QMX) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)
icbench: $(OBJS)
$(CC) $(OBJS) -lm -o icbench $(LFLAGS)
$(CXX) $(OBJS) -lm -o icbench $(LFLAGS)
idxseg: idxseg.o
$(CC) idxseg.o -o idxseg
@ -83,10 +83,10 @@ idxqry: idxqry.o bitunpack.o vp4dd.o bitunpackv.o bitutil.o
$(CC) -O3 $(CFLAGS) $< -c -o $@
.cc.o:
$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
$(CXX) -O3 -DNDEBUG $(MARCH) $< -c -o $@
.cpp.o:
$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
$(CXX) -O3 -DNDEBUG $< -c -o $@
clean:
@find . -type f -name "*\.o" -delete -or -name "*\~" -delete -or -name "core" -delete

View File

@ -377,6 +377,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#undef P4DDD
#undef P4DDECD
#undef BITUNPACKD
#undef BITUNPACKD_
#undef BITUNDD
#define P4DDD p4dddv