.
This commit is contained in:
@ -311,4 +311,4 @@ header files to use with documentation:<br />
|
||||
- [On Inverted Index Compression for Search Engine Efficiency](http://www.dcs.gla.ac.uk/~craigm/publications/catena14compression.pdf)
|
||||
- [Google's Group Varint Encoding](http://static.googleusercontent.com/media/research.google.com/de//people/jeff/WSDM09-keynote.pdf)
|
||||
|
||||
Last update: 24 APR 2016
|
||||
Last update: 19 JUN 2016
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
//- Optional external libraries. Activate also in makefile -----
|
||||
//#define _LIBFOR // libfor
|
||||
#define _QMX
|
||||
|
||||
//#define _BTSHUF // https://github.com/kiyo-masui/bitshuffle
|
||||
|
||||
@ -47,6 +48,10 @@
|
||||
#include "for/for.h"
|
||||
#endif
|
||||
|
||||
#ifdef _QMX
|
||||
#include "qmx/compress_qmx.h"
|
||||
#endif
|
||||
|
||||
#ifdef _ZLIB
|
||||
#include <zlib.h>
|
||||
#endif
|
||||
|
||||
10
ext/qmx/GNUmakefile
Normal file
10
ext/qmx/GNUmakefile
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
# OS X and Linux Makefile
|
||||
#
|
||||
|
||||
compress_qmx :
|
||||
g++ -O3 -msse4 compress_qmx.c -o compress_qmx
|
||||
|
||||
clean :
|
||||
rm compress_qmx
|
||||
|
||||
16
ext/qmx/README
Normal file
16
ext/qmx/README
Normal file
@ -0,0 +1,16 @@
|
||||
QMX README
|
||||
----------
|
||||
The source is released under the BSD license (you choose which one).
|
||||
|
||||
See (and please cite), in the ACM Digital Library (and on my website):
|
||||
|
||||
A. Trotman (2014), Compression, SIMD, and Postings Lists. In Proceedings of the 19th Australasian Document Computing Symposium (ADCS 2014)
|
||||
|
||||
One C++ class is provided. It compiles and runs on Linux, OS X, and Windows. Use make to build the executable that compresses and decompressed one string (and checks that the code works).
|
||||
|
||||
IMPORTANT NOTE
|
||||
--------------
|
||||
As QMX decodes in "chunks", it can (i.e. will normally) decode more integers than requested. In other words, it will normally overflow the output buffer. Allowing for 256 "extras" will suffice. These extras will be garbage. Although it is possible to encode to prevent (much) "junk", in this implementation the decision was made to favour smaller compressed size and the consequence is more junk decoded.
|
||||
|
||||
Andrew
|
||||
|
||||
6730
ext/qmx/compress_qmx.cc
Normal file
6730
ext/qmx/compress_qmx.cc
Normal file
File diff suppressed because it is too large
Load Diff
22
ext/qmx/compress_qmx.h
Normal file
22
ext/qmx/compress_qmx.h
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
COMPRESS_QMX.H
|
||||
--------------
|
||||
*/
|
||||
#ifndef COMPRESS_QMX_H_
|
||||
#define COMPRESS_QMX_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
unsigned char *qmx_enc( const uint32_t *in, unsigned n, unsigned char *out);
|
||||
unsigned char *qmx_dec(const unsigned char *in, unsigned len, uint32_t *out, unsigned n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
10
ext/qmx/makefile
Normal file
10
ext/qmx/makefile
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
# Windows Makefile
|
||||
#
|
||||
|
||||
compress_qmx.exe :
|
||||
cl /Ox /Tp compress_qmx.c
|
||||
|
||||
clean :
|
||||
del compress_qmx.obj compress_qmx.exe
|
||||
|
||||
@ -148,6 +148,7 @@ enum { P_CPY, // cop
|
||||
P_SV, P_SVANS, P_S16, P_S64, // simple family: , simpleV, simple16, simple-8b
|
||||
P_P4D, P_P4DR, P_OPTP4, // PFor, PForDelta
|
||||
P_LIBFOR, // For
|
||||
P_VSQMX, // QMX
|
||||
P_LZT10, P_LZT20, P_LZT22, // LzTurbo
|
||||
P_LZ4, // lz4
|
||||
P_BSHUF, P_BLZ, P_BLZ4, P_BZLIB, // https://github.com/Blosc/c-blosc
|
||||
@ -177,6 +178,7 @@ unsigned char *beenc(unsigned *__restrict in, size_t n, unsigned char *__restric
|
||||
case P_SV: return vsenc32( in, n, out);
|
||||
case P_S16: return vs16enc( in, n, (unsigned *)out);
|
||||
case P_S64: return vs8benc( in, n, out);
|
||||
case P_VSQMX: { unsigned char *q = qmx_enc(in, n, out+4); *(unsigned *)out = q - (out+4); return q; }
|
||||
// --------- elias fano ----------------------------------------------
|
||||
case P_EFANO: return out;
|
||||
// --------- PFor ----------------------------------------------------
|
||||
@ -254,6 +256,7 @@ unsigned char *bedec(unsigned char *__restrict in, size_t n, unsigned *__restric
|
||||
|
||||
case P_S16: return vs16dec( (unsigned *)in, n, out);
|
||||
case P_S64: return vs8bdec( in, n, out);
|
||||
case P_VSQMX: { unsigned l = *(unsigned *)in; return qmx_dec(in+4, l, out, n); }
|
||||
// --------- elias fano -----------------------------------------------
|
||||
case P_EFANO: return in;
|
||||
// --------- PFor -----------------------------------------------------
|
||||
@ -615,6 +618,9 @@ struct libss libss[] = {
|
||||
{ P_VBP, "VBytePoly" },
|
||||
#endif
|
||||
|
||||
#ifdef _QMX
|
||||
{ P_VSQMX, "qmx" },
|
||||
#endif
|
||||
// ----- Simple family -----
|
||||
{ P_SV, "VSimple" },
|
||||
// { P_SVANS, "VSimpleANS", BLK_SIZE },
|
||||
|
||||
10
makefile
10
makefile
@ -41,7 +41,7 @@ SIMDCOMP=$(SIMDCOMPD)bitpacka.o $(SIMDCOMPD)src/simdintegratedbitpacking.o $(SIM
|
||||
|
||||
#LIBFOR=ext/for/for.o
|
||||
MVB=ext/MaskedVByte/src/varintencode.o ext/MaskedVByte/src/varintdecode.o
|
||||
|
||||
QMX=ext/qmx/compress_qmx.o
|
||||
# Lzturbo not included
|
||||
#LZT=../lz/lz8c0.o ../lz/lz8d.o ../lz/lzbc0.o ../lz/lzbd.o
|
||||
|
||||
@ -60,10 +60,10 @@ LZ4=ext/lz4.o
|
||||
|
||||
#BSHUFFLE=ext/bitshuffle/src/bitshuffle.o
|
||||
|
||||
OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)
|
||||
OBJS=icbench.o bitutil.o vint.o bitpack.o bitunpack.o eliasfano.o vsimple.o vp4dd.o vp4dc.o varintg8iu.o bitpackv.o bitunpackv.o $(TRANSP) ext/simple8b.o transpose.o $(BLOSC) $(SIMDCOMP) $(LIBFOR) $(QMX) $(LZT) $(LZ4) $(MVB) $(ZLIB) $(BSHUFFLE)
|
||||
|
||||
icbench: $(OBJS)
|
||||
$(CC) $(OBJS) -lm -o icbench $(LFLAGS)
|
||||
$(CXX) $(OBJS) -lm -o icbench $(LFLAGS)
|
||||
|
||||
idxseg: idxseg.o
|
||||
$(CC) idxseg.o -o idxseg
|
||||
@ -83,10 +83,10 @@ idxqry: idxqry.o bitunpack.o vp4dd.o bitunpackv.o bitutil.o
|
||||
$(CC) -O3 $(CFLAGS) $< -c -o $@
|
||||
|
||||
.cc.o:
|
||||
$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
|
||||
$(CXX) -O3 -DNDEBUG $(MARCH) $< -c -o $@
|
||||
|
||||
.cpp.o:
|
||||
$(CXX) -O3 -DNDEBUG -std=c++11 $< -c -o $@
|
||||
$(CXX) -O3 -DNDEBUG $< -c -o $@
|
||||
|
||||
clean:
|
||||
@find . -type f -name "*\.o" -delete -or -name "*\~" -delete -or -name "core" -delete
|
||||
|
||||
Reference in New Issue
Block a user