TurboPFor: Bit Packing
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -1,158 +0,0 @@
|
|||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED.H
|
|
||||||
-------------------------------
|
|
||||||
Copyright (c) 2014-2017 Andrew Trotman
|
|
||||||
Released under the 2-clause BSD license (See:https://en.wikipedia.org/wiki/BSD_licenses)
|
|
||||||
*/
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
namespace JASS
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
CLASS COMPRESS_INTEGER_QMX_IMPROVED
|
|
||||||
-----------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief QMX compression improved (smaller and faster to decode)
|
|
||||||
@details Trotman & Lin describe several improvements to the QMX codex in:
|
|
||||||
|
|
||||||
A. Trotman, J. Lin (2016), In Vacuo and In Situ Evaluation of SIMD Codecs, Proceedings of The 21st Australasian Document Computing Symposium (ADCS 2016
|
|
||||||
|
|
||||||
including removal of the vbyte encoded length from the end of the encoded sequence. This version of QMX is the original QMX with that improvement added,
|
|
||||||
but none of the other imprivements suggested by Trotman & Lin. This makes the encoded sequence smaller, and faster to decode, than any of the other
|
|
||||||
alrernatives suggested. It does not include the code to prevent read and write overruns from the encoded string and into the decode buffer. To account
|
|
||||||
for overwrites make sure the decode-into buffer is at least 256 integers larger than required. To prevent over-reads from the encoded string make sure
|
|
||||||
that that string is at least 16 bytes longer than needed.
|
|
||||||
|
|
||||||
At the request of Matthias Petri (University of Melbourne), the code no longer requires SIMD-word alignment to decode (the read and write
|
|
||||||
instructions have been changed from aligned to unaligned since Intel made them faster).
|
|
||||||
|
|
||||||
For details on the original QMX encoding see:
|
|
||||||
|
|
||||||
A. Trotman (2014), Compression, SIMD, and Postings Lists, Proceedings of the 19th Australasian Document Computing Symposium (ADCS 2014)
|
|
||||||
*/
|
|
||||||
class compress_integer_qmx_improved
|
|
||||||
{
|
|
||||||
typedef uint32_t integer;
|
|
||||||
|
|
||||||
private:
|
|
||||||
uint8_t *length_buffer; ///< Stores the number of bits needed to compress each integer
|
|
||||||
uint64_t length_buffer_length; ///< The length of length_buffer
|
|
||||||
uint32_t *full_length_buffer; ///< If the run_length is too short then 0-pad into this buffer
|
|
||||||
|
|
||||||
private:
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::WRITE_OUT()
|
|
||||||
------------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Encode and write out the sequence into the buffer
|
|
||||||
@param buffer [in] where to write the encoded sequence
|
|
||||||
@param source [in] the integer sequence to encode
|
|
||||||
@param raw_count [in] the numnber of integers to encode
|
|
||||||
@param size_in_bits [in] the size, in bits, of the largest integer
|
|
||||||
@param buffer_length [in] the length of buffer, in bytes
|
|
||||||
*/
|
|
||||||
void write_out(uint8_t **buffer, uint32_t *source, uint32_t raw_count, uint32_t size_in_bits, uint8_t **length_buffer);
|
|
||||||
|
|
||||||
public:
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::COMPRESS_INTEGER_QMX_IMPROVED()
|
|
||||||
--------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Constructor
|
|
||||||
*/
|
|
||||||
compress_integer_qmx_improved() :
|
|
||||||
length_buffer(nullptr),
|
|
||||||
length_buffer_length(0),
|
|
||||||
full_length_buffer(new uint32_t [256 * 16])
|
|
||||||
{
|
|
||||||
/* Nothing */
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::~COMPRESS_INTEGER_QMX_IMPROVED()
|
|
||||||
---------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Destructor
|
|
||||||
*/
|
|
||||||
virtual ~compress_integer_qmx_improved()
|
|
||||||
{
|
|
||||||
delete [] length_buffer;
|
|
||||||
delete [] full_length_buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::ENCODE()
|
|
||||||
---------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Encode a sequence of integers returning the number of bytes used for the encoding, or 0 if the encoded sequence doesn't fit in the buffer.
|
|
||||||
@param encoded [out] The sequence of bytes that is the encoded sequence.
|
|
||||||
@param encoded_buffer_length [in] The length (in bytes) of the output buffer, encoded.
|
|
||||||
@param source [in] The sequence of integers to encode.
|
|
||||||
@param source_integers [in] The length (in integers) of the source buffer.
|
|
||||||
@return The number of bytes used to encode the integer sequence, or 0 on error (i.e. overflow).
|
|
||||||
*/
|
|
||||||
virtual size_t encode(void *encoded, size_t encoded_buffer_length, const integer *source, size_t source_integers);
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::DECODE()
|
|
||||||
---------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Decode a sequence of integers encoded with this codex.
|
|
||||||
@param decoded [out] The sequence of decoded integers.
|
|
||||||
@param integers_to_decode [in] The minimum number of integers to decode (it may decode more).
|
|
||||||
@param source [in] The encoded integers.
|
|
||||||
@param source_length [in] The length (in bytes) of the source buffer.
|
|
||||||
*/
|
|
||||||
virtual void decode(integer *decoded, size_t integers_to_decode, const void *source, size_t source_length);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::ENCODEARRAY()
|
|
||||||
--------------------------------------------
|
|
||||||
provided for backwards compatibility
|
|
||||||
*/
|
|
||||||
virtual void encodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t *nvalue)
|
|
||||||
{
|
|
||||||
*nvalue = encode(out, *nvalue, in, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::DECODEARRAY()
|
|
||||||
--------------------------------------------
|
|
||||||
provided for backwards compatibility
|
|
||||||
*/
|
|
||||||
virtual void decodeArray(const uint32_t *in, uint64_t len, uint32_t *out, uint64_t nvalue)
|
|
||||||
{
|
|
||||||
decode(out, nvalue, in, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::UNITTEST_ONE()
|
|
||||||
---------------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Test one sequence to make sure it encodes and decodes to the same thing. Assert if not.
|
|
||||||
@para sequence [in] the sequernce to encode.
|
|
||||||
*/
|
|
||||||
static void unittest_one(const std::vector<uint32_t> &sequence);
|
|
||||||
|
|
||||||
/*
|
|
||||||
COMPRESS_INTEGER_QMX_IMPROVED::UNITTEST()
|
|
||||||
-----------------------------------------
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
@brief Unit test this class
|
|
||||||
*/
|
|
||||||
static void unittest(void);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
Reference in New Issue
Block a user