IcBench App: External libs
This commit is contained in:
Submodule ext/FastPFor updated: 285bcd192d...71d54a9793
238
ext/SPDP_10.c
Normal file
238
ext/SPDP_10.c
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
/*
|
||||||
|
SPDP code: SPDP is a unified compression/decompression algorithm that works
|
||||||
|
well on both binary 32-bit single-precision (float) and binary 64-bit double-
|
||||||
|
precision (double) floating-point data.
|
||||||
|
|
||||||
|
Copyright (c) 2016, Texas State University. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
are permitted for academic, research, experimental, or personal use provided
|
||||||
|
that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions, and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions, and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of Texas State University nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from this
|
||||||
|
software without specific prior written permission.
|
||||||
|
|
||||||
|
For all other uses, please contact the Office for Commercialization and Industry
|
||||||
|
Relations at Texas State University <http://www.txstate.edu/ocir/>.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||||
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
Authors: Martin Burtscher and Steven Claggett
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define MAX_TABLE_SIZE (1 << 18)
|
||||||
|
|
||||||
|
typedef unsigned char byte_t;
|
||||||
|
typedef unsigned int word_t;
|
||||||
|
|
||||||
|
|
||||||
|
static size_t compress(const byte_t level, const size_t length, byte_t* const buf1, byte_t* const buf2)
|
||||||
|
{
|
||||||
|
word_t* in = (word_t*)buf1;
|
||||||
|
word_t* out = (word_t*)buf2;
|
||||||
|
size_t len = length / sizeof(word_t);
|
||||||
|
|
||||||
|
word_t prev2 = 0;
|
||||||
|
word_t prev1 = 0;
|
||||||
|
size_t pos;
|
||||||
|
for (pos = 0; pos < len; pos++) {
|
||||||
|
word_t curr = in[pos];
|
||||||
|
out[pos] = curr - prev2;
|
||||||
|
prev2 = prev1;
|
||||||
|
prev1 = curr;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (pos = len * sizeof(word_t); pos < length; pos++) {
|
||||||
|
buf2[pos] = buf1[pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
byte_t prev = 0;
|
||||||
|
size_t wpos = 0;
|
||||||
|
size_t d;
|
||||||
|
for (d = 0; d < 8; d++) {
|
||||||
|
size_t rpos;
|
||||||
|
for (rpos = d; rpos < length; rpos += 8) {
|
||||||
|
byte_t curr = buf2[rpos];
|
||||||
|
buf1[wpos] = curr - prev;
|
||||||
|
prev = curr;
|
||||||
|
wpos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t predtabsize = 1 << (level + 9);
|
||||||
|
if (predtabsize > MAX_TABLE_SIZE) predtabsize = MAX_TABLE_SIZE;
|
||||||
|
const size_t predtabsizem1 = predtabsize - 1;
|
||||||
|
|
||||||
|
unsigned int lastpos[MAX_TABLE_SIZE];
|
||||||
|
memset(lastpos, 0, predtabsize * sizeof(unsigned int));
|
||||||
|
|
||||||
|
size_t rpos = 0;
|
||||||
|
wpos = 0;
|
||||||
|
unsigned int hist = 0;
|
||||||
|
while (rpos < length) {
|
||||||
|
byte_t val = buf1[rpos];
|
||||||
|
unsigned int lpos = lastpos[hist];
|
||||||
|
if (lpos >= 6) {
|
||||||
|
if ((buf1[lpos - 6] == buf1[rpos - 6]) && (buf1[lpos - 5] == buf1[rpos - 5]) &&
|
||||||
|
(buf1[lpos - 4] == buf1[rpos - 4]) && (buf1[lpos - 3] == buf1[rpos - 3]) &&
|
||||||
|
(buf1[lpos - 2] == buf1[rpos - 2]) && (buf1[lpos - 1] == buf1[rpos - 1])) {
|
||||||
|
byte_t cnt = 0;
|
||||||
|
while ((val == buf1[lpos]) && (cnt < 255) && (rpos < (length - 1))) {
|
||||||
|
lastpos[hist] = rpos;
|
||||||
|
hist = ((hist << 2) ^ val) & predtabsizem1;
|
||||||
|
rpos++;
|
||||||
|
lpos++;
|
||||||
|
cnt++;
|
||||||
|
val = buf1[rpos];
|
||||||
|
}
|
||||||
|
buf2[wpos] = cnt;
|
||||||
|
wpos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf2[wpos] = val;
|
||||||
|
wpos++;
|
||||||
|
lastpos[hist] = rpos;
|
||||||
|
hist = ((hist << 2) ^ val) & predtabsizem1;
|
||||||
|
rpos++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return wpos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void decompress(const byte_t level, const size_t length, byte_t* const buf2, byte_t* const buf1)
|
||||||
|
{
|
||||||
|
unsigned int predtabsize = 1 << (level + 9);
|
||||||
|
if (predtabsize > MAX_TABLE_SIZE) predtabsize = MAX_TABLE_SIZE;
|
||||||
|
const unsigned int predtabsizem1 = predtabsize - 1;
|
||||||
|
|
||||||
|
unsigned int lastpos[MAX_TABLE_SIZE];
|
||||||
|
memset(lastpos, 0, predtabsize * sizeof(unsigned int));
|
||||||
|
|
||||||
|
size_t rpos = 0;
|
||||||
|
size_t wpos = 0;
|
||||||
|
unsigned int hist = 0;
|
||||||
|
while (rpos < length) {
|
||||||
|
unsigned int lpos = lastpos[hist];
|
||||||
|
if (lpos >= 6) {
|
||||||
|
if ((buf1[lpos - 6] == buf1[wpos - 6]) && (buf1[lpos - 5] == buf1[wpos - 5]) &&
|
||||||
|
(buf1[lpos - 4] == buf1[wpos - 4]) && (buf1[lpos - 3] == buf1[wpos - 3]) &&
|
||||||
|
(buf1[lpos - 2] == buf1[wpos - 2]) && (buf1[lpos - 1] == buf1[wpos - 1])) {
|
||||||
|
byte_t cnt = buf2[rpos];
|
||||||
|
rpos++;
|
||||||
|
byte_t j;
|
||||||
|
for (j = 0; j < cnt; j++) {
|
||||||
|
byte_t val = buf1[wpos] = buf1[lpos];
|
||||||
|
lastpos[hist] = wpos;
|
||||||
|
hist = ((hist << 2) ^ val) & predtabsizem1;
|
||||||
|
wpos++;
|
||||||
|
lpos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
byte_t val = buf1[wpos] = buf2[rpos];
|
||||||
|
lastpos[hist] = wpos;
|
||||||
|
hist = ((hist << 2) ^ val) & predtabsizem1;
|
||||||
|
wpos++;
|
||||||
|
rpos++;
|
||||||
|
}
|
||||||
|
const size_t usize = wpos;
|
||||||
|
|
||||||
|
byte_t val = 0;
|
||||||
|
rpos = 0;
|
||||||
|
size_t d;
|
||||||
|
for (d = 0; d < 8; d++) {
|
||||||
|
size_t wpos;
|
||||||
|
for (wpos = d; wpos < usize; wpos += 8) {
|
||||||
|
val += buf1[rpos];
|
||||||
|
buf2[wpos] = val;
|
||||||
|
rpos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
word_t* in = (word_t*)buf2;
|
||||||
|
word_t* out = (word_t*)buf1;
|
||||||
|
const size_t len = usize / sizeof(word_t);
|
||||||
|
|
||||||
|
word_t prev2 = 0;
|
||||||
|
word_t prev1 = 0;
|
||||||
|
size_t pos;
|
||||||
|
for (pos = 0; pos < len; pos++) {
|
||||||
|
word_t curr = in[pos] + prev2;
|
||||||
|
out[pos] = curr;
|
||||||
|
prev2 = prev1;
|
||||||
|
prev1 = curr;
|
||||||
|
}
|
||||||
|
for (pos = len * sizeof(word_t); pos < usize; pos++) {
|
||||||
|
buf1[pos] = buf2[pos];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifndef NMAIN
|
||||||
|
#define BUFFER_SIZE (1 << 23)
|
||||||
|
static byte_t buffer1[BUFFER_SIZE];
|
||||||
|
static byte_t buffer2[BUFFER_SIZE * 2 + 9];
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
fprintf(stderr, "SPDP Floating-Point Compressor v1.0\n");
|
||||||
|
fprintf(stderr, "Copyright (c) 2016 Texas State University\n\n");
|
||||||
|
|
||||||
|
if ((argc != 1) && (argc != 2)) {
|
||||||
|
fprintf(stderr, "compression usage: %s level < uncompressed_file > compressed_file\n", argv[0]);
|
||||||
|
fprintf(stderr, "decompression usage: %s < compressed_file > decompressed_file\n", argv[0]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc == 2) { // compression
|
||||||
|
byte_t level = atoi(argv[1]);
|
||||||
|
if (level < 0) level = 0;
|
||||||
|
if (level > 9) level = 9;
|
||||||
|
fwrite(&level, sizeof(byte_t), 1, stdout);
|
||||||
|
|
||||||
|
int length = fread(buffer1, sizeof(byte_t), BUFFER_SIZE, stdin);
|
||||||
|
while (length > 0) {
|
||||||
|
fwrite(&length, sizeof(int), 1, stdout);
|
||||||
|
int csize = compress(level, length, buffer1, buffer2);
|
||||||
|
fwrite(&csize, sizeof(int), 1, stdout);
|
||||||
|
fwrite(buffer2, sizeof(byte_t), csize, stdout);
|
||||||
|
length = fread(buffer1, sizeof(byte_t), BUFFER_SIZE, stdin);
|
||||||
|
}
|
||||||
|
} else { // decompression
|
||||||
|
byte_t level = 10;
|
||||||
|
fread(&level, sizeof(byte_t), 1, stdin);
|
||||||
|
if ((level < 0) || (level > 9)) {
|
||||||
|
fprintf(stderr, "incorrect input file type\n");
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int length;
|
||||||
|
while (fread(&length, sizeof(int), 1, stdin) > 0) {
|
||||||
|
int csize;
|
||||||
|
fread(&csize, sizeof(int), 1, stdin);
|
||||||
|
fread(buffer2, sizeof(byte_t), csize, stdin);
|
||||||
|
decompress(level, csize, buffer2, buffer1);
|
||||||
|
fwrite(buffer1, sizeof(byte_t), length, stdout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
Submodule ext/c-blosc updated: 32ccb88cda...39ca44b1dc
2
ext/lz4
2
ext/lz4
Submodule ext/lz4 updated: f76ee4e267...b5233d3726
Submodule ext/streamvbyte updated: 5249a2f11e...a0bc6210ca
72
ext/trle.h
Normal file
72
ext/trle.h
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
/**
|
||||||
|
Copyright (C) powturbo 2015-2018
|
||||||
|
GPL v2 License
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
- email : powturbo [AT] gmail.com
|
||||||
|
- github : https://github.com/powturbo
|
||||||
|
- homepage : https://sites.google.com/site/powturbo/
|
||||||
|
- twitter : https://twitter.com/powturbo
|
||||||
|
|
||||||
|
TurboRLE - "Most efficient and fastest Run Length Encoding"
|
||||||
|
**/
|
||||||
|
#if defined(_MSC_VER) && _MSC_VER < 1600
|
||||||
|
#include "vs/stdint.h"
|
||||||
|
#else
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
// RLE with specified escape char
|
||||||
|
unsigned _srlec8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e);
|
||||||
|
unsigned _srled8( const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint8_t e);
|
||||||
|
|
||||||
|
unsigned _srlec16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint16_t e);
|
||||||
|
unsigned _srled16(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint16_t e);
|
||||||
|
|
||||||
|
unsigned _srlec32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint32_t e);
|
||||||
|
unsigned _srled32(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint32_t e);
|
||||||
|
|
||||||
|
unsigned _srlec64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint64_t e);
|
||||||
|
unsigned _srled64(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint64_t e);
|
||||||
|
|
||||||
|
// functions w/ overflow handling
|
||||||
|
unsigned srlec8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e);
|
||||||
|
unsigned srled8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint8_t e);
|
||||||
|
|
||||||
|
unsigned srlec16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint16_t e);
|
||||||
|
unsigned srled16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint16_t e);
|
||||||
|
|
||||||
|
unsigned srlec32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint32_t e);
|
||||||
|
unsigned srled32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint32_t e);
|
||||||
|
|
||||||
|
unsigned srlec64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint64_t e);
|
||||||
|
unsigned srled64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint64_t e);
|
||||||
|
|
||||||
|
// RLE w. automatic escape char determination
|
||||||
|
unsigned srlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out);
|
||||||
|
unsigned _srled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen);
|
||||||
|
unsigned srled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen);
|
||||||
|
|
||||||
|
// Turbo RLE
|
||||||
|
unsigned trlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out);
|
||||||
|
unsigned _trled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen);
|
||||||
|
unsigned trled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen);
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
62
ext/trle_.h
Normal file
62
ext/trle_.h
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/**
|
||||||
|
Copyright (C) powturbo 2015-2018
|
||||||
|
GPL v2 License
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
- email : powturbo [AT] gmail.com
|
||||||
|
- github : https://github.com/powturbo
|
||||||
|
- homepage : https://sites.google.com/site/powturbo/
|
||||||
|
- twitter : https://twitter.com/powturbo
|
||||||
|
|
||||||
|
TurboRLE - "Most efficient and fastest Run Length Encoding"
|
||||||
|
**/
|
||||||
|
//------------------------- Variable Byte from https://github.com/powturbo/TurboPFor -----------------------------------------------------
|
||||||
|
#include "../conf.h"
|
||||||
|
#define VB_SIZE 64
|
||||||
|
#define VB_MAX 254
|
||||||
|
#define VB_B2 6
|
||||||
|
#define VB_B3 3
|
||||||
|
#define VB_BA3 (VB_MAX - (VB_SIZE/8 - 3))
|
||||||
|
#define VB_BA2 (VB_BA3 - (1<<VB_B3))
|
||||||
|
|
||||||
|
#define VB_OFS1 (VB_BA2 - (1<<VB_B2))
|
||||||
|
#define VB_OFS2 (VB_OFS1 + (1 << (8+VB_B2)))
|
||||||
|
#define VB_OFS3 (VB_OFS2 + (1 << (16+VB_B3)))
|
||||||
|
|
||||||
|
#define _vblen32(_x_) ((_x_) < VB_OFS1?1:((_x_) < VB_OFS2?2:((_x_) < VB_OFS3)?3:(bsr32(_x_)+7)/8+1))
|
||||||
|
#define _vbvlen32(_x_) ((_x_) < VB_OFS1?1:((_x_) < VB_BA2?2:((_x_) < VB_BA3)?3:(_x_-VB_BA3)))
|
||||||
|
|
||||||
|
#define _vbput32(_op_, _x_, _act_) {\
|
||||||
|
if(likely((_x_) < VB_OFS1)){ *_op_++ = (_x_); _act_;}\
|
||||||
|
else if ((_x_) < VB_OFS2) { ctou16(_op_) = bswap16((VB_OFS1<<8)+((_x_)-VB_OFS1)); _op_ += 2; /*(_x_) -= VB_OFS1; *_op_++ = VB_OFS1 + ((_x_) >> 8); *_op_++ = (_x_);*/ _act_; }\
|
||||||
|
else if ((_x_) < VB_OFS3) { *_op_++ = VB_BA2 + (((_x_) -= VB_OFS2) >> 16); ctou16(_op_) = (_x_); _op_ += 2; _act_;}\
|
||||||
|
else { unsigned _b = (bsr32((_x_))+7)/8; *_op_++ = VB_BA3 + (_b - 3); ctou32(_op_) = (_x_); _op_ += _b; _act_;}\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define _vbget32(_ip_, _x_, _act_) do { _x_ = *_ip_++;\
|
||||||
|
if(likely(_x_ < VB_OFS1)) { _act_ ;}\
|
||||||
|
else if(likely(_x_ < VB_BA2)) { _x_ = /*bswap16(ctou16(_ip_-1))*/ ((_x_<<8) + (*_ip_)) + (VB_OFS1 - (VB_OFS1 << 8)); _ip_++; _act_;} \
|
||||||
|
else if(likely(_x_ < VB_BA3)) { _x_ = ctou16(_ip_) + ((_x_ - VB_BA2 ) << 16) + VB_OFS2; _ip_ += 2; _act_;}\
|
||||||
|
else { unsigned _b = _x_-VB_BA3; _x_ = ctou32(_ip_) & ((1u << 8 * _b << 24) - 1); _ip_ += 3 + _b; _act_;}\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define vbput32(_op_, _x_) { register unsigned _x = _x_; _vbput32(_op_, _x, ;); }
|
||||||
|
#define vbget32(_ip_, _x_) _vbget32(_ip_, _x_, ;)
|
||||||
|
|
||||||
|
#define vbzput(_op_, _x_, _m_, _emap_) do { if(unlikely((_x_) < _m_)) *_op_++ = _emap_[_x_]; else { unsigned _xi = (_x_) - _m_; *_op_++ = _emap_[_m_]; vbput32(_op_, _xi); } } while(0)
|
||||||
|
#define vbzget(_ip_, _x_, _m_, _e_) { _x_ = _e_; if(unlikely(_x_ == _m_)) { vbget32(_ip_,_x_); _x_+=_m_; } }
|
||||||
|
|
||||||
|
#define TMIN 3
|
349
ext/trlec.c
Normal file
349
ext/trlec.c
Normal file
@ -0,0 +1,349 @@
|
|||||||
|
/**
|
||||||
|
Copyright (C) powturbo 2015-2018
|
||||||
|
GPL v2 License
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
- email : powturbo [AT] gmail.com
|
||||||
|
- github : https://github.com/powturbo
|
||||||
|
- homepage : https://sites.google.com/site/powturbo/
|
||||||
|
- twitter : https://twitter.com/powturbo
|
||||||
|
|
||||||
|
TurboRLE - "Most efficient and fastest Run Length Encoding"
|
||||||
|
**/
|
||||||
|
#ifndef USIZE
|
||||||
|
#include <string.h>
|
||||||
|
#ifdef __SSE__
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "trle_.h"
|
||||||
|
#include "trle.h"
|
||||||
|
|
||||||
|
//------------------------------------- Histogram ---------------------------------------------------------
|
||||||
|
static inline unsigned hist(const unsigned char *__restrict in, unsigned inlen, unsigned *cc) { // Optimized for x86
|
||||||
|
unsigned c0[256+8]={0},c1[256+8]={0},c2[256+8]={0},c3[256+8]={0},c4[256+8]={0},c5[256+8]={0},c6[256+8]={0},c7[256+8]={0};
|
||||||
|
|
||||||
|
const unsigned char *ip;
|
||||||
|
unsigned cp = *(unsigned *)in,a;
|
||||||
|
int i;
|
||||||
|
for(ip = in; ip != in+(inlen&~(16-1));) {
|
||||||
|
unsigned c = cp, d = *(unsigned *)(ip+=4); cp = *(unsigned *)(ip+=4);
|
||||||
|
c0[(unsigned char) c ]++;
|
||||||
|
c1[(unsigned char) d ]++;
|
||||||
|
c2[(unsigned char)(c>>8)]++; c>>=16;
|
||||||
|
c3[(unsigned char)(d>>8)]++; d>>=16;
|
||||||
|
c4[(unsigned char) c ]++;
|
||||||
|
c5[(unsigned char) d ]++;
|
||||||
|
c6[ c>>8 ]++;
|
||||||
|
c7[ d>>8 ]++;
|
||||||
|
|
||||||
|
c = cp; d = *(unsigned *)(ip+=4); cp = *(unsigned *)(ip+=4);
|
||||||
|
c0[(unsigned char) c ]++;
|
||||||
|
c1[(unsigned char) d ]++;
|
||||||
|
c2[(unsigned char)(c>>8)]++; c>>=16;
|
||||||
|
c3[(unsigned char)(d>>8)]++; d>>=16;
|
||||||
|
c4[(unsigned char) c ]++;
|
||||||
|
c5[(unsigned char) d ]++;
|
||||||
|
c6[ c>>8 ]++;
|
||||||
|
c7[ d>>8 ]++;
|
||||||
|
}
|
||||||
|
while(ip < in+inlen) c0[*ip++]++;
|
||||||
|
|
||||||
|
for(i = 0; i < 256; i++)
|
||||||
|
cc[i] += c0[i]+c1[i]+c2[i]+c3[i]+c4[i]+c5[i]+c6[i]+c7[i];
|
||||||
|
a = 256;
|
||||||
|
while(a > 1 && !cc[a-1]) a--;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
//------------------------------------- RLE with Escape char ------------------------------------------------------------------
|
||||||
|
#define SRLE8 32
|
||||||
|
#define USIZE 8
|
||||||
|
#include "trlec.c"
|
||||||
|
|
||||||
|
#if SRLE8
|
||||||
|
#define SRLEC8(pp, ip, op, e) do {\
|
||||||
|
unsigned i = ip - pp;\
|
||||||
|
if(i > 3) { *op++ = e; i -= 3; vbput32(op, i); *op++ = c; }\
|
||||||
|
else if(c == e) {\
|
||||||
|
while(i--) { *op++ = e; vbput32(op, 0); }\
|
||||||
|
} else while(i--) *op++ = c;\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
unsigned _srlec8(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e) {
|
||||||
|
const uint8_t *ip = in, *pp = in - 1;
|
||||||
|
uint8_t *op = out,c;
|
||||||
|
|
||||||
|
if(inlen > SRLE8)
|
||||||
|
while(ip < in+(inlen-1-SRLE8)) {
|
||||||
|
#if 0 //def __SSE__ // SSE slower than scalar
|
||||||
|
__m128i cv = _mm_set1_epi8(*ip);
|
||||||
|
unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i*)(ip+1)), cv)); if(mask != 0xffffu) goto a; ip += 16;
|
||||||
|
mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i*)(ip+1)), cv)); if(mask != 0xffffu) goto a; ip += 16;
|
||||||
|
continue;
|
||||||
|
a: c = *ip;
|
||||||
|
ip += __builtin_ctz((unsigned short)(~mask));
|
||||||
|
SRLEC8(pp, ip, op, e);
|
||||||
|
pp = ip++;
|
||||||
|
#elif __WORDSIZE == 64
|
||||||
|
{unsigned long long z;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
#if SRLE8 >= 32
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
#endif
|
||||||
|
__builtin_prefetch(ip +256, 0);
|
||||||
|
continue;
|
||||||
|
a: c = *ip;
|
||||||
|
ip += ctz64(z)>>3;
|
||||||
|
SRLEC8(pp, ip, op, e);
|
||||||
|
pp = ip++;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
{ unsigned z;
|
||||||
|
if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4;
|
||||||
|
if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4;
|
||||||
|
#if SRLE8 >= 16
|
||||||
|
if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4;
|
||||||
|
if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4;
|
||||||
|
#endif
|
||||||
|
__builtin_prefetch(ip +256, 0);
|
||||||
|
continue;
|
||||||
|
a: c = *ip;
|
||||||
|
ip += ctz32(z)>>3;
|
||||||
|
SRLEC8(pp, ip, op, e);
|
||||||
|
pp = ip++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
for(;ip < in+inlen; ip++)
|
||||||
|
if(*ip != ip[1]) {
|
||||||
|
c = *ip;
|
||||||
|
SRLEC8(pp,ip, op, e);
|
||||||
|
pp = ip;
|
||||||
|
}
|
||||||
|
c = *ip;
|
||||||
|
SRLEC8(pp, ip, op, e);
|
||||||
|
return op - out;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unsigned srlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out) {
|
||||||
|
unsigned m = 0xffffffffu, mi = 0, i, b[256] = {0},a;
|
||||||
|
size_t l;
|
||||||
|
if(inlen < 1) return 0;
|
||||||
|
|
||||||
|
a = hist(in,inlen,b);
|
||||||
|
if(b[a-1] == inlen) {
|
||||||
|
*out = *in;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < 256; i++)
|
||||||
|
if(b[i] <= m)
|
||||||
|
m = b[i],mi = i;
|
||||||
|
*out = mi;
|
||||||
|
l = _srlec8(in, inlen, out+1, mi)+1;
|
||||||
|
if(l < inlen)
|
||||||
|
return l;
|
||||||
|
memcpy(out, in, inlen);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------- TurboRLE ------------------------------------------
|
||||||
|
struct u { unsigned c,i; };
|
||||||
|
|
||||||
|
#define PUTC(op, x) *op++ = x
|
||||||
|
#define TRLEC(pp, ip, op, _goto_) do {\
|
||||||
|
unsigned _i = ip - pp;\
|
||||||
|
if(_i >= TMIN) {\
|
||||||
|
unsigned char *q = op; \
|
||||||
|
vbzput(op, _i-TMIN, m, rmap); \
|
||||||
|
if((op-q) + 1 < _i) { *op++ = c; _goto_; } op=q;\
|
||||||
|
} while(_i--) PUTC(op,c);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define TRLEC0(pp, ip, op, _goto_) do { unsigned _i = ip - pp;\
|
||||||
|
if(_i >= TMIN) { vbzput(op, _i-TMIN, m, rmap); *op++ = c; } else while(_i--) PUTC(op,c);\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
unsigned trlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out) {
|
||||||
|
int m,i;
|
||||||
|
unsigned b[256] = {0}, rmap[256],a;
|
||||||
|
struct u u[256],*v; // sort
|
||||||
|
unsigned char *op;
|
||||||
|
const unsigned char *ip,*pp;
|
||||||
|
uint8_t c;
|
||||||
|
if(inlen < 1) return 0;
|
||||||
|
|
||||||
|
a = hist(in,inlen,b);
|
||||||
|
if(b[a-1] == inlen) {
|
||||||
|
*out = *in;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < 256; i++) u[i].c = b[i], u[i].i = i,b[i]=0;
|
||||||
|
for(v = u + 1; v < u + 256; ++v)
|
||||||
|
if(v->c < v[-1].c) {
|
||||||
|
struct u *w, tmp = *v;
|
||||||
|
for(w = v; w > u && tmp.c < w[-1].c; --w) *w = w[-1];
|
||||||
|
*w = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(m = -1,i = 0; i < 256 && !u[i].c; i++)
|
||||||
|
b[u[i].i]++, ++m;
|
||||||
|
|
||||||
|
op = out;
|
||||||
|
|
||||||
|
if(m < 0) { // no unused bytes found
|
||||||
|
size_t l;
|
||||||
|
*op++ = 0;
|
||||||
|
*op++ = u[0].i;
|
||||||
|
l = _srlec8(in, inlen, op, u[0].i)+2;
|
||||||
|
if(l < inlen) return l;
|
||||||
|
memcpy(out, in, inlen);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
*op++ = 1;
|
||||||
|
memset(op, 0, 32);
|
||||||
|
for(m = -1,i = 0; i < 256; i++)
|
||||||
|
if(b[i]) {
|
||||||
|
op[i>>3] |= 1<<(i&7);
|
||||||
|
rmap[++m] = i;
|
||||||
|
}
|
||||||
|
op += 32;
|
||||||
|
|
||||||
|
ip = in; pp=in-1;
|
||||||
|
if(inlen > SRLE8)
|
||||||
|
while(ip < in+(inlen-1-SRLE8)) {
|
||||||
|
unsigned long long z;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
#if SRLE8 >= 32
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8;
|
||||||
|
#endif
|
||||||
|
__builtin_prefetch(ip +256, 0);
|
||||||
|
continue;
|
||||||
|
a: c = *ip;
|
||||||
|
ip += ctz64(z)>>3;
|
||||||
|
TRLEC(pp, ip, op, goto laba);
|
||||||
|
laba:pp = ip++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(;ip < in+inlen; ip++) {
|
||||||
|
if(*ip != *(ip+1)) {
|
||||||
|
c = *ip;
|
||||||
|
TRLEC(pp, ip, op, goto labb);
|
||||||
|
labb:pp = ip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c = *ip;
|
||||||
|
TRLEC(pp,ip, op, goto labc);
|
||||||
|
labc:
|
||||||
|
if(op - out < inlen)
|
||||||
|
return op - out;
|
||||||
|
memcpy(out, in, inlen);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef USIZE
|
||||||
|
#undef SRLE8
|
||||||
|
|
||||||
|
#define USIZE 16
|
||||||
|
#include "trlec.c"
|
||||||
|
#undef USIZE
|
||||||
|
|
||||||
|
#define USIZE 32
|
||||||
|
#include "trlec.c"
|
||||||
|
#undef USIZE
|
||||||
|
|
||||||
|
#define USIZE 64
|
||||||
|
#include "trlec.c"
|
||||||
|
#undef USIZE
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define uint_t TEMPLATE3(uint, USIZE, _t)
|
||||||
|
|
||||||
|
#define SRLEC(pp, ip, op, e) do {\
|
||||||
|
unsigned i = ip - pp;\
|
||||||
|
if(i > 3) { *(uint_t *)op = e; op+=sizeof(uint_t); i -= 3; vbput32(op, i); *(uint_t *)op = c; op+=sizeof(uint_t); }\
|
||||||
|
else if(c == e) {\
|
||||||
|
while(i--) { *(uint_t *)op = e; op+=sizeof(uint_t); vbput32(op, 0); }\
|
||||||
|
} else while(i--) { *(uint_t *)op = c; op+=sizeof(uint_t); }\
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#if !SRLE8
|
||||||
|
unsigned TEMPLATE2(_srlec, USIZE)(const unsigned char *__restrict cin, unsigned inlen, unsigned char *__restrict out, uint_t e) {
|
||||||
|
unsigned char *op = out;
|
||||||
|
uint_t *in = (uint_t *)cin, *pp = in-1, *ip=in,c;
|
||||||
|
unsigned n = inlen/sizeof(uint_t);
|
||||||
|
unsigned char *p;
|
||||||
|
if(n > 4)
|
||||||
|
for(; ip < in+(n-1-4);) {
|
||||||
|
#if 0
|
||||||
|
if(* ip == ip[1])
|
||||||
|
if(*++ip == ip[1])
|
||||||
|
if(*++ip == ip[1])
|
||||||
|
if(*++ip == ip[1]) {
|
||||||
|
ip++; __builtin_prefetch(ip +256, 0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if(*ip != ip[1]) goto a; ++ip;
|
||||||
|
if(*ip != ip[1]) goto a; ++ip;
|
||||||
|
if(*ip != ip[1]) goto a; ++ip;
|
||||||
|
if(*ip != ip[1]) goto a; ++ip; __builtin_prefetch(ip +256, 0);
|
||||||
|
continue;
|
||||||
|
a:;
|
||||||
|
#endif
|
||||||
|
c = *ip;
|
||||||
|
SRLEC(pp,ip, op, e);
|
||||||
|
pp = ip++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(;ip < in+n; ip++)
|
||||||
|
if(*ip != ip[1]) {
|
||||||
|
c = *ip;
|
||||||
|
SRLEC(pp,ip, op, e);
|
||||||
|
pp = ip;
|
||||||
|
}
|
||||||
|
c = *ip;
|
||||||
|
SRLEC(pp, ip, op, e);
|
||||||
|
|
||||||
|
#if USIZE > 8
|
||||||
|
p = (unsigned char *)ip;
|
||||||
|
while(p < cin+inlen)
|
||||||
|
*op++ = *p++;
|
||||||
|
#endif
|
||||||
|
return op - out;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#undef SRLEC
|
||||||
|
|
||||||
|
unsigned TEMPLATE2(srlec, USIZE)(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint_t e) {
|
||||||
|
size_t l = TEMPLATE2(_srlec, USIZE)(in, inlen, out, e);
|
||||||
|
|
||||||
|
if(l < inlen)
|
||||||
|
return l;
|
||||||
|
memcpy(out, in, inlen);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
259
ext/trled.c
Normal file
259
ext/trled.c
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
/**
|
||||||
|
Copyright (C) powturbo 2015-2018
|
||||||
|
GPL v2 License
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
- email : powturbo [AT] gmail.com
|
||||||
|
- github : https://github.com/powturbo
|
||||||
|
- homepage : https://sites.google.com/site/powturbo/
|
||||||
|
- twitter : https://twitter.com/powturbo
|
||||||
|
|
||||||
|
TurboRLE - "Most efficient and fastest Run Length Encoding"
|
||||||
|
**/
|
||||||
|
#ifndef USIZE
|
||||||
|
#include <string.h>
|
||||||
|
#ifdef __SSE__
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "trle.h"
|
||||||
|
#include "trle_.h"
|
||||||
|
|
||||||
|
//------------------------------------- RLE with Escape char ------------------------------------------------------------------
|
||||||
|
//#define MEMSAFE
|
||||||
|
#define SRLE8 32 // 16//
|
||||||
|
#define USIZE 8
|
||||||
|
#include "trled.c"
|
||||||
|
|
||||||
|
#if SRLE8
|
||||||
|
unsigned _srled8(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, unsigned char e) {
|
||||||
|
const uint8_t *ip = in;
|
||||||
|
uint8_t *op = out, c;
|
||||||
|
uint32_t i;
|
||||||
|
#ifdef __SSE__
|
||||||
|
__m128i ev = _mm_set1_epi8(e);
|
||||||
|
#endif
|
||||||
|
if(outlen >= SRLE8)
|
||||||
|
while(op < out+(outlen-SRLE8)) {
|
||||||
|
|
||||||
|
#ifdef __SSE__ // TODO: test _mm_cmpestrm/_mm_cmpestri on sse4
|
||||||
|
uint32_t mask;
|
||||||
|
__m128i u,v = _mm_loadu_si128((__m128i*)ip); _mm_storeu_si128((__m128i *)op, v); mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v, ev)); if(mask) goto a; op += 16; ip += 16;
|
||||||
|
#if SRLE8 >= 32
|
||||||
|
u = _mm_loadu_si128((__m128i*)ip); _mm_storeu_si128((__m128i *)op, u); mask = _mm_movemask_epi8(_mm_cmpeq_epi8(u, ev)); if(mask) goto a; op += 16; ip += 16;
|
||||||
|
#endif
|
||||||
|
__builtin_prefetch(ip+512, 0);
|
||||||
|
continue;
|
||||||
|
a: i = ctz32(mask);
|
||||||
|
op += i; ip += i+1;
|
||||||
|
{
|
||||||
|
#else
|
||||||
|
if(likely((c = *(uint8_t *)ip) != e)) {
|
||||||
|
ip++;
|
||||||
|
*op++ = c;
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
vbget32(ip, i);
|
||||||
|
if(likely(i)) {
|
||||||
|
uint8_t c = *ip++;
|
||||||
|
i += TMIN;
|
||||||
|
rmemset(op, c, i);
|
||||||
|
} else
|
||||||
|
*op++ = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define rmemset8(_op_, _c_, _i_) while(_i_--) *_op_++ = _c_
|
||||||
|
while(op < out+outlen)
|
||||||
|
if(likely((c = *ip) != e)) {
|
||||||
|
ip++;
|
||||||
|
*op++ = c;
|
||||||
|
} else {
|
||||||
|
int i;
|
||||||
|
ip++;
|
||||||
|
vbget32(ip, i);
|
||||||
|
if(likely(i)) {
|
||||||
|
c = *ip++;
|
||||||
|
i += TMIN;
|
||||||
|
rmemset8(op, c, i);
|
||||||
|
} else
|
||||||
|
*op++ = e;
|
||||||
|
}
|
||||||
|
return ip - in;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unsigned _srled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen) {
|
||||||
|
return _srled8(in+1, out, outlen, *in);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned srled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen) {
|
||||||
|
if(inlen == outlen)
|
||||||
|
memcpy(out, in, outlen);
|
||||||
|
else if(inlen == 1)
|
||||||
|
memset(out, in[0], outlen);
|
||||||
|
else
|
||||||
|
return _srled8(in+1, out, outlen, *in);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
//------------------------------------- TurboRLE ------------------------------------------
|
||||||
|
unsigned _trled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen) {
|
||||||
|
uint8_t b[256] = {0},*op = out;
|
||||||
|
const uint8_t *ip;
|
||||||
|
int m = -1, i, c;
|
||||||
|
|
||||||
|
if(outlen < 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if(!*in++)
|
||||||
|
return _srled8(in+1, out, outlen, *in)+2;
|
||||||
|
|
||||||
|
for(ip = in; ip < in+32; ip++)
|
||||||
|
for(i = 0; i < 8; ++i)
|
||||||
|
if(((*ip) >> i) & 1)
|
||||||
|
b[(ip-in)<<3 | i] = ++m+1;
|
||||||
|
|
||||||
|
if(outlen >= 32)
|
||||||
|
while(op < out+(outlen-32)) {
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
if(b[*ip]) goto a; *op++ = *ip++;
|
||||||
|
__builtin_prefetch(ip+256, 0);
|
||||||
|
continue;
|
||||||
|
a:
|
||||||
|
c = b[*ip++];
|
||||||
|
vbzget(ip, i, m, c-1);
|
||||||
|
c = *ip++;
|
||||||
|
i += 3;
|
||||||
|
rmemset(op,c,i);
|
||||||
|
}
|
||||||
|
while(op < out+outlen) {
|
||||||
|
if(likely(!(c = b[*ip])))
|
||||||
|
*op++ = *ip++;
|
||||||
|
else {
|
||||||
|
ip++;
|
||||||
|
vbzget(ip, i, m, c-1);
|
||||||
|
c = *ip++;
|
||||||
|
i += 3;
|
||||||
|
rmemset8(op,c,i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ip - in;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned trled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen) {
|
||||||
|
if(inlen == outlen)
|
||||||
|
memcpy(out, in, outlen);
|
||||||
|
else if(inlen == 1)
|
||||||
|
memset(out, in[0], outlen);
|
||||||
|
else
|
||||||
|
return _trled(in, out, outlen);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef USIZE
|
||||||
|
#undef rmemset
|
||||||
|
#undef SRLE8
|
||||||
|
|
||||||
|
#define USIZE 16
|
||||||
|
#include "trled.c"
|
||||||
|
#undef rmemset
|
||||||
|
#undef USIZE
|
||||||
|
#undef runcpy
|
||||||
|
|
||||||
|
#define USIZE 32
|
||||||
|
#include "trled.c"
|
||||||
|
#undef rmemset
|
||||||
|
#undef USIZE
|
||||||
|
#undef runcpy
|
||||||
|
|
||||||
|
#define USIZE 64
|
||||||
|
#include "trled.c"
|
||||||
|
#undef rmemset
|
||||||
|
#undef USIZE
|
||||||
|
|
||||||
|
#else
|
||||||
|
#ifdef MEMSAFE
|
||||||
|
#define rmemset(_op_, _c_, _i_) while(_i_--) *_op_++ = _c_
|
||||||
|
#elif defined(__SSE__) && USIZE < 64
|
||||||
|
#define rmemset(_op_, _c_, _i_) do { \
|
||||||
|
__m128i *_up = (__m128i *)_op_, cv = TEMPLATE2(_mm_set1_epi, USIZE)(_c_);\
|
||||||
|
_op_ += _i_;\
|
||||||
|
do { _mm_storeu_si128( _up, cv); _mm_storeu_si128(_up+1, cv); _up+=2; } while(_up < (__m128i *)_op_);\
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define _cset64(_cc,_c_) _cc = _c_
|
||||||
|
#define _cset32(_cc,_c_) _cc = _c_; _cc = _cc<<32|_cc
|
||||||
|
#define _cset16(_cc,_c_) _cc = _c_; _cc = _cc<<48|_cc<<32|_cc<<16|_cc
|
||||||
|
#define _cset8( _cc,_c_) _cc = (uint32_t)_c_<<24 | (uint32_t)_c_<<16 | (uint32_t)_c_<<8 | (uint32_t)_c_; _cc = _cc<<32|_cc
|
||||||
|
|
||||||
|
#define rmemset(_op_, _c_, _i_) do { uint64_t _cc; uint8_t *_up = (uint8_t *)_op_; _op_ +=_i_;\
|
||||||
|
TEMPLATE2(_cset, USIZE)(_cc,_c_);\
|
||||||
|
do {\
|
||||||
|
TEMPLATE2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\
|
||||||
|
TEMPLATE2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\
|
||||||
|
} while(_up < (uint8_t *)_op_);\
|
||||||
|
} while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define uint_t TEMPLATE3(uint, USIZE, _t)
|
||||||
|
|
||||||
|
#if !SRLE8
|
||||||
|
unsigned TEMPLATE2(_srled, USIZE)(const unsigned char *__restrict in, unsigned char *__restrict cout, unsigned outlen, uint_t e) {
|
||||||
|
uint_t *out = (uint_t *)cout, *op = out, c;
|
||||||
|
const unsigned char *ip = in;
|
||||||
|
|
||||||
|
while(op < out+outlen/sizeof(uint_t)) { __builtin_prefetch(ip +384, 0);
|
||||||
|
if(likely((c = *(uint_t *)ip) != e)) {
|
||||||
|
ip += sizeof(uint_t);
|
||||||
|
*op++ = c;
|
||||||
|
} else {
|
||||||
|
int i;
|
||||||
|
ip += sizeof(uint_t);
|
||||||
|
vbget32(ip, i);
|
||||||
|
if(likely(i)) {
|
||||||
|
c = *(uint_t *)ip;
|
||||||
|
ip += sizeof(uint_t);
|
||||||
|
i += 3;
|
||||||
|
rmemset(op, c, i);
|
||||||
|
} else
|
||||||
|
*op++ = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if USIZE > 8
|
||||||
|
{ unsigned char *p = (unsigned char *)op;
|
||||||
|
while(p < cout+outlen) *p++ = *ip++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ip - in;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unsigned TEMPLATE2(srled, USIZE)(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint_t e) {
|
||||||
|
if(inlen == outlen)
|
||||||
|
memcpy(out, in, outlen);
|
||||||
|
else if(inlen == 1)
|
||||||
|
memset(out, in[0], outlen);
|
||||||
|
else
|
||||||
|
return TEMPLATE2(_srled, USIZE)(in, out, outlen, e);
|
||||||
|
return inlen;
|
||||||
|
}
|
||||||
|
#endif
|
Reference in New Issue
Block a user