From 4aed8150984ad214a7231b67c9db2229ad76bc9e Mon Sep 17 00:00:00 2001 From: x Date: Mon, 15 Jul 2019 10:32:54 +0200 Subject: [PATCH] TurboPFor: Bit Packing --- bitpack.c | 89 ++++++++------ ext/trle.h | 72 ----------- ext/trle_.h | 62 ---------- ext/trlec.c | 349 ---------------------------------------------------- ext/trled.c | 259 -------------------------------------- 5 files changed, 54 insertions(+), 777 deletions(-) mode change 100644 => 100755 bitpack.c delete mode 100644 ext/trle.h delete mode 100644 ext/trle_.h delete mode 100644 ext/trlec.c delete mode 100644 ext/trled.c diff --git a/bitpack.c b/bitpack.c old mode 100644 new mode 100755 index 797d54d..43d3c0e --- a/bitpack.c +++ b/bitpack.c @@ -1,5 +1,5 @@ /** - Copyright (C) powturbo 2013-2018 + Copyright (C) powturbo 2013-2019 GPL v2 License This program is free software; you can redistribute it and/or modify @@ -25,10 +25,11 @@ #include #include "conf.h" -#include "bitpack.h" #include "bitutil.h" #include "vint.h" +#include "bitpack.h" #define PAD8(_x_) ( (((_x_)+8-1)/8) ) +#define PREFETCH(_ip_) __builtin_prefetch(_ip_+768,0)//#define PREFETCH(ip) #pragma warning( disable : 4005) #pragma warning( disable : 4090) @@ -45,7 +46,6 @@ typedef unsigned char *(*BITPACK_D32)(uint32_t *__restrict out, unsigned n, cons typedef unsigned char *(*BITPACK_F64)(uint64_t *__restrict out, unsigned n, const unsigned char *__restrict in); typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, const unsigned char *__restrict in, uint64_t start); -#define PREFETCH(_ip_) __builtin_prefetch(_ip_+768,0)//#define PREFETCH(ip) #if 1 //def _MSC_VER #define VX (v=x) @@ -192,6 +192,7 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNPACK(in, n, out, 128, 8); } size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNPACK(in, n, out, 128, 16); } + size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNPACK(in, n, out, 128, 32); } size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNPACK(in, n, out, 128, 64); } @@ -225,38 +226,40 @@ size_t bitnfpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict return op - out;\ } -#define _BITNDPACKV(in, n, out, _csize_, _usize_, _bitd_, _bitpackv_, _bitpack_) { if(!n) return 0;\ +#define _BITNDPACKV(in, n, out, _csize_, _usize_, _bitdv_, _bitpackv_, _bitd_, _bitpack_) { if(!n) return 0;\ unsigned char *op = out; \ start = *in++; \ TEMPLATE2(vbxput, _usize_)(op, start);\ for(n--,ip = in; ip != in + (n&~(_csize_-1)); ) { PREFETCH(ip+512);\ - unsigned b = TEMPLATE2(_bitd_, _usize_)(ip, _csize_, start); *op++ = b; op = TEMPLATE2(_bitpackv_, _usize_)(ip, _csize_, op, start, b); ip += _csize_; start = ip[-1];\ - } if(n&=(_csize_-1)) { unsigned b = TEMPLATE2(_bitd_, _usize_)(ip, n, start); *op++ = b; op = TEMPLATE2(_bitpack_, _usize_)(ip, n, op, start, b); }\ + unsigned b = TEMPLATE2(_bitdv_, _usize_)(ip, _csize_, start); *op++ = b; op = TEMPLATE2(_bitpackv_, _usize_)(ip, _csize_, op, start, b); ip += _csize_; start = ip[-1];\ + } if(n&=(_csize_-1)) { unsigned b = TEMPLATE2(_bitd_, _usize_)(ip, n, start); *op++ = b; op = TEMPLATE2(_bitpack_, _usize_)(ip, n, op, start, b); }\ return op - out;\ } -#if defined(__SSE2__) && defined(SSE2_ON) -#include - +#if (defined(__SSE2__) || defined(__ARM_NEON)) && defined(SSE2_ON) #define OPPE(__op) #define IPPE(__op) #define VI16(ip, i, iv, parm) #define VI32(ip, i, iv, parm) -#define IP16(ip, i, iv) _mm_loadu_si128(ip++) -#define IP32(ip, i, iv) _mm_loadu_si128(ip++) -#include "bitpack_.h" +#define IP16(_ip_, i, iv) _mm_loadu_si128(_ip_++) +#define IP32(_ip_, i, iv) _mm_loadu_si128(_ip_++) +#include "bitpack_.h" unsigned char *bitpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V16(in, b, out, 0); return pout; } unsigned char *bitpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; } unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *_out=out; unsigned *_in=in; BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return _out+PAD8(256*b); } - -#define IP32(ip, i, iv) _mm_or_si128(_mm_shuffle_epi32(_mm_loadu_si128(ip++),_MM_SHUFFLE(2, 0, 3, 1)), _mm_shuffle_epi32(_mm_loadu_si128(ip++),_MM_SHUFFLE(3, 1, 2, 0)) ) + +#ifdef __ARM_NEON +//#define IP32(_ip_, i, iv) _mm_or_si128(_mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(3, 1, 2, 0)), _mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(2, 0, 3, 1)) ) +#define IP32(_ip_, _i_, _iv_) _mm_or_si128(mm_shuffle_3120_epi32(_mm_loadu_si128(_ip_++) ), mm_shuffle_2031_epi32(_mm_loadu_si128(_ip++) ) ) // optimized shuffle +#else +#define IP32(_ip_, i, iv) _mm_or_si128(_mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(2, 0, 3, 1)), _mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(3, 1, 2, 0)) ) +#endif #include "bitpack_.h" -unsigned char *bitpack128v64(uint64_t *__restrict _in, unsigned n, unsigned char *__restrict out, unsigned b) { - if(b>32) return bitpack64(_in,n,out,b); - else { unsigned char *pout = out+PAD8(128*b); uint32_t *in = _in; BITPACK128V32(in, b, out, 0); return pout; } -} +unsigned char *bitpack128v64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { + if(b<=32) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; } else return bitpack64(in,n,out,b); +} #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x16(v,_sv_); _sv_ = v #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v @@ -270,7 +273,7 @@ unsigned char *bitdpack128v32(unsigned *__restrict in, unsigned n, unsigne __m128i v,sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout; } -#define VI16(_ip_, _i_, _iv_, _sv_) +#define VI16(_ip_, _i_, _iv_, _sv_) #define VI32(_ip_, _i_, _iv_, _sv_) #define IP16(_ip_, i, _iv_) _mm_sub_epi16(_mm_loadu_si128(_ip_++),sv) #define IP32(_ip_, i, _iv_) _mm_sub_epi32(_mm_loadu_si128(_ip_++),sv) @@ -281,18 +284,30 @@ unsigned char *bitfpack128v16(unsigned short *__restrict in, unsigned n, unsigne unsigned char *bitfpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); __m128i v, sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout; } - + #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi16(DELTA128x16(v,_sv_),cv); _sv_ = v #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(DELTA128x32(v,_sv_),cv); _sv_ = v #define IP16(ip, i, _iv_) _iv_ #define IP32(ip, i, _iv_) _iv_ unsigned char *bitd1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1); BITPACK128V16(in, b, out, sv); return pout; + __m128i sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1), v; BITPACK128V16(in, b, out, sv); return pout; } unsigned char *bitd1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); __m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); BITPACK128V32(in, b, out, sv); return pout; } +#define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi16(SUBI128x16(v,_sv_),cv); _sv_ = v +#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(SUBI128x32(v,_sv_),cv); _sv_ = v +#define IP16(ip, i, _iv_) _iv_ +#define IP32(ip, i, _iv_) _iv_ +unsigned char *bits1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); + __m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(8); BITPACK128V16(in, b, out, sv); return pout; +} +unsigned char *bits1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); + __m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(4); BITPACK128V32(in, b, out, sv); return pout; +} + + #define VI16(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi16(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi16(_sv_,cv); #define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi32(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi32(_sv_,cv); #define IP16(ip, i, _iv_) _iv_ @@ -316,18 +331,22 @@ unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigne size_t bitnpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip; _BITNPACKV( in, n, out, 128, 16, bitpack128v); } size_t bitnpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 128, 32, bitpack128v); } size_t bitnpack128v64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip; _BITNPACKV( in, n, out, 128, 64, bitpack128v); } +size_t bitnpack256w32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 256, 32, bitpack256w); } -size_t bitndpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitd, bitdpack128v, bitdpack); } -size_t bitndpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitd, bitdpack128v, bitdpack); } +size_t bitndpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitd, bitdpack128v, bitd, bitdpack); } +size_t bitndpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitd, bitdpack128v, bitd, bitdpack); } -size_t bitnd1pack128v16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitd1, bitd1pack128v, bitd1pack); } -size_t bitnd1pack128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitd1, bitd1pack128v, bitd1pack); } +size_t bitnd1pack128v16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitd1, bitd1pack128v, bitd1, bitd1pack); } +size_t bitnd1pack128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitd1, bitd1pack128v, bitd1, bitd1pack); } -size_t bitnzpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitz, bitzpack128v, bitzpack); } -size_t bitnzpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitz, bitzpack128v, bitzpack); } +size_t bitns1pack128v16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bits128v, bits1pack128v, bitd1, bitd1pack); } +size_t bitns1pack128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bits128v, bits1pack128v, bitd1, bitd1pack); } -size_t bitnfpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitf, bitfpack128v, bitfpack); } -size_t bitnfpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitf, bitfpack128v, bitfpack); } +size_t bitnzpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitz, bitzpack128v, bitz, bitzpack); } +size_t bitnzpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitz, bitzpack128v, bitz, bitzpack); } + +size_t bitnfpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitf, bitfpack128v, bitf, bitfpack); } +size_t bitnfpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitf, bitfpack128v, bitf, bitfpack); } #endif #if defined(__AVX2__) && defined(AVX2_ON) @@ -342,8 +361,7 @@ size_t bitnfpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__rest #define VI32(ip, i, iv, parm) #define IP32(ip, i, iv) _mm256_loadu_si256(ip++) -#include "bitpack_.h" - + unsigned char *bitpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(256*b); BITPACK256V32(in, b, out, 0); return pout; } #undef VI32 #undef IP32 @@ -389,11 +407,12 @@ unsigned char *bitzpack256v32(unsigned *__restrict in, unsigned n, unsigne } size_t bitnpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 256, 32, bitpack256v); } -size_t bitndpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd, bitdpack256v, bitdpack); } -size_t bitnd1pack256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd1, bitd1pack256v, bitd1pack); } -size_t bitnzpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitz, bitzpack256v, bitzpack); } -size_t bitnfpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitf, bitfpack256v, bitfpack); } +size_t bitndpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd, bitdpack256v, bitd, bitdpack); } +size_t bitnd1pack256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd1, bitd1pack256v,bitd1, bitd1pack); } +size_t bitnzpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitz, bitzpack256v, bitz, bitzpack); } +size_t bitnfpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitf, bitfpack256v, bitf, bitfpack); } #endif #pragma clang diagnostic pop + diff --git a/ext/trle.h b/ext/trle.h deleted file mode 100644 index 32b2bcc..0000000 --- a/ext/trle.h +++ /dev/null @@ -1,72 +0,0 @@ -/** - Copyright (C) powturbo 2015-2018 - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - - email : powturbo [AT] gmail.com - - github : https://github.com/powturbo - - homepage : https://sites.google.com/site/powturbo/ - - twitter : https://twitter.com/powturbo - - TurboRLE - "Most efficient and fastest Run Length Encoding https://github.com/powturbo/TurboRLE" -**/ -#if defined(_MSC_VER) && _MSC_VER < 1600 -#include "vs/stdint.h" -#else -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif -// RLE with specified escape char -unsigned _srlec8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e); -unsigned _srled8( const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint8_t e); - -unsigned _srlec16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint16_t e); -unsigned _srled16(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint16_t e); - -unsigned _srlec32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint32_t e); -unsigned _srled32(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint32_t e); - -unsigned _srlec64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint64_t e); -unsigned _srled64(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, uint64_t e); - -// functions w/ overflow handling -unsigned srlec8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e); -unsigned srled8( const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint8_t e); - -unsigned srlec16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint16_t e); -unsigned srled16(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint16_t e); - -unsigned srlec32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint32_t e); -unsigned srled32(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint32_t e); - -unsigned srlec64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint64_t e); -unsigned srled64(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint64_t e); - -// RLE w. automatic escape char determination -unsigned srlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out); -unsigned _srled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen); -unsigned srled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen); - -// Turbo RLE -unsigned trlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out); -unsigned _trled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen); -unsigned trled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen); -#ifdef __cplusplus -} -#endif diff --git a/ext/trle_.h b/ext/trle_.h deleted file mode 100644 index b3b929f..0000000 --- a/ext/trle_.h +++ /dev/null @@ -1,62 +0,0 @@ -/** - Copyright (C) powturbo 2015-2018 - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - - email : powturbo [AT] gmail.com - - github : https://github.com/powturbo - - homepage : https://sites.google.com/site/powturbo/ - - twitter : https://twitter.com/powturbo - - TurboRLE - "Most efficient and fastest Run Length Encoding" -**/ -//------------------------- Variable Byte from https://github.com/powturbo/TurboPFor ----------------------------------------------------- -#include "../conf.h" -#define VB_SIZE 64 -#define VB_MAX 254 -#define VB_B2 6 -#define VB_B3 3 -#define VB_BA3 (VB_MAX - (VB_SIZE/8 - 3)) -#define VB_BA2 (VB_BA3 - (1<> 8); *_op_++ = (_x_);*/ _act_; }\ - else if ((_x_) < VB_OFS3) { *_op_++ = VB_BA2 + (((_x_) -= VB_OFS2) >> 16); ctou16(_op_) = (_x_); _op_ += 2; _act_;}\ - else { unsigned _b = (bsr32((_x_))+7)/8; *_op_++ = VB_BA3 + (_b - 3); ctou32(_op_) = (_x_); _op_ += _b; _act_;}\ -} - -#define _vbget32(_ip_, _x_, _act_) do { _x_ = *_ip_++;\ - if(likely(_x_ < VB_OFS1)) { _act_ ;}\ - else if(likely(_x_ < VB_BA2)) { _x_ = /*bswap16(ctou16(_ip_-1))*/ ((_x_<<8) + (*_ip_)) + (VB_OFS1 - (VB_OFS1 << 8)); _ip_++; _act_;} \ - else if(likely(_x_ < VB_BA3)) { _x_ = ctou16(_ip_) + ((_x_ - VB_BA2 ) << 16) + VB_OFS2; _ip_ += 2; _act_;}\ - else { unsigned _b = _x_-VB_BA3; _x_ = ctou32(_ip_) & ((1u << 8 * _b << 24) - 1); _ip_ += 3 + _b; _act_;}\ -} while(0) - -#define vbput32(_op_, _x_) { register unsigned _x = _x_; _vbput32(_op_, _x, ;); } -#define vbget32(_ip_, _x_) _vbget32(_ip_, _x_, ;) - -#define vbzput(_op_, _x_, _m_, _emap_) do { if(unlikely((_x_) < _m_)) *_op_++ = _emap_[_x_]; else { unsigned _xi = (_x_) - _m_; *_op_++ = _emap_[_m_]; vbput32(_op_, _xi); } } while(0) -#define vbzget(_ip_, _x_, _m_, _e_) { _x_ = _e_; if(unlikely(_x_ == _m_)) { vbget32(_ip_,_x_); _x_+=_m_; } } - -#define TMIN 3 diff --git a/ext/trlec.c b/ext/trlec.c deleted file mode 100644 index 631ae77..0000000 --- a/ext/trlec.c +++ /dev/null @@ -1,349 +0,0 @@ -/** - Copyright (C) powturbo 2015-2018 - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - - email : powturbo [AT] gmail.com - - github : https://github.com/powturbo - - homepage : https://sites.google.com/site/powturbo/ - - twitter : https://twitter.com/powturbo - - TurboRLE - "Most efficient and fastest Run Length Encoding" -**/ - #ifndef USIZE -#include - #ifdef __SSE__ -#include - #endif - -#include "trle_.h" -#include "trle.h" - -//------------------------------------- Histogram --------------------------------------------------------- -static inline unsigned hist(const unsigned char *__restrict in, unsigned inlen, unsigned *cc) { // Optimized for x86 - unsigned c0[256+8]={0},c1[256+8]={0},c2[256+8]={0},c3[256+8]={0},c4[256+8]={0},c5[256+8]={0},c6[256+8]={0},c7[256+8]={0}; - - const unsigned char *ip; - unsigned cp = *(unsigned *)in,a; - int i; - for(ip = in; ip != in+(inlen&~(16-1));) { - unsigned c = cp, d = *(unsigned *)(ip+=4); cp = *(unsigned *)(ip+=4); - c0[(unsigned char) c ]++; - c1[(unsigned char) d ]++; - c2[(unsigned char)(c>>8)]++; c>>=16; - c3[(unsigned char)(d>>8)]++; d>>=16; - c4[(unsigned char) c ]++; - c5[(unsigned char) d ]++; - c6[ c>>8 ]++; - c7[ d>>8 ]++; - - c = cp; d = *(unsigned *)(ip+=4); cp = *(unsigned *)(ip+=4); - c0[(unsigned char) c ]++; - c1[(unsigned char) d ]++; - c2[(unsigned char)(c>>8)]++; c>>=16; - c3[(unsigned char)(d>>8)]++; d>>=16; - c4[(unsigned char) c ]++; - c5[(unsigned char) d ]++; - c6[ c>>8 ]++; - c7[ d>>8 ]++; - } - while(ip < in+inlen) c0[*ip++]++; - - for(i = 0; i < 256; i++) - cc[i] += c0[i]+c1[i]+c2[i]+c3[i]+c4[i]+c5[i]+c6[i]+c7[i]; - a = 256; - while(a > 1 && !cc[a-1]) a--; - return a; -} -//------------------------------------- RLE with Escape char ------------------------------------------------------------------ -#define SRLE8 32 -#define USIZE 8 -#include "trlec.c" - - #if SRLE8 -#define SRLEC8(pp, ip, op, e) do {\ - unsigned i = ip - pp;\ - if(i > 3) { *op++ = e; i -= 3; vbput32(op, i); *op++ = c; }\ - else if(c == e) {\ - while(i--) { *op++ = e; vbput32(op, 0); }\ - } else while(i--) *op++ = c;\ -} while(0) - -unsigned _srlec8(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint8_t e) { - const uint8_t *ip = in, *pp = in - 1; - uint8_t *op = out,c; - - if(inlen > SRLE8) - while(ip < in+(inlen-1-SRLE8)) { - #if 0 //def __SSE__ // SSE slower than scalar - __m128i cv = _mm_set1_epi8(*ip); - unsigned mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i*)(ip+1)), cv)); if(mask != 0xffffu) goto a; ip += 16; - mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i*)(ip+1)), cv)); if(mask != 0xffffu) goto a; ip += 16; - continue; - a: c = *ip; - ip += __builtin_ctz((unsigned short)(~mask)); - SRLEC8(pp, ip, op, e); - pp = ip++; - #elif __WORDSIZE == 64 - {unsigned long long z; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - #if SRLE8 >= 32 - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - #endif - __builtin_prefetch(ip +256, 0); - continue; - a: c = *ip; - ip += ctz64(z)>>3; - SRLEC8(pp, ip, op, e); - pp = ip++; - } - #else - { unsigned z; - if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4; - if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4; - #if SRLE8 >= 16 - if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4; - if((z = (ctou32(ip) ^ ctou32(ip+1)))) goto a; ip += 4; - #endif - __builtin_prefetch(ip +256, 0); - continue; - a: c = *ip; - ip += ctz32(z)>>3; - SRLEC8(pp, ip, op, e); - pp = ip++; - } - #endif - } - - for(;ip < in+inlen; ip++) - if(*ip != ip[1]) { - c = *ip; - SRLEC8(pp,ip, op, e); - pp = ip; - } - c = *ip; - SRLEC8(pp, ip, op, e); - return op - out; -} -#endif - -unsigned srlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out) { - unsigned m = 0xffffffffu, mi = 0, i, b[256] = {0},a; - size_t l; - if(inlen < 1) return 0; - - a = hist(in,inlen,b); - if(b[a-1] == inlen) { - *out = *in; - return 1; - } - - for(i = 0; i < 256; i++) - if(b[i] <= m) - m = b[i],mi = i; - *out = mi; - l = _srlec8(in, inlen, out+1, mi)+1; - if(l < inlen) - return l; - memcpy(out, in, inlen); - return inlen; -} - -//------------------------------------------------- TurboRLE ------------------------------------------ -struct u { unsigned c,i; }; - -#define PUTC(op, x) *op++ = x -#define TRLEC(pp, ip, op, _goto_) do {\ - unsigned _i = ip - pp;\ - if(_i >= TMIN) {\ - unsigned char *q = op; \ - vbzput(op, _i-TMIN, m, rmap); \ - if((op-q) + 1 < _i) { *op++ = c; _goto_; } op=q;\ - } while(_i--) PUTC(op,c);\ -} while(0) - -#define TRLEC0(pp, ip, op, _goto_) do { unsigned _i = ip - pp;\ - if(_i >= TMIN) { vbzput(op, _i-TMIN, m, rmap); *op++ = c; } else while(_i--) PUTC(op,c);\ -} while(0) - -unsigned trlec(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out) { - int m,i; - unsigned b[256] = {0}, rmap[256],a; - struct u u[256],*v; // sort - unsigned char *op; - const unsigned char *ip,*pp; - uint8_t c; - if(inlen < 1) return 0; - - a = hist(in,inlen,b); - if(b[a-1] == inlen) { - *out = *in; - return 1; - } - - for(i = 0; i < 256; i++) u[i].c = b[i], u[i].i = i,b[i]=0; - for(v = u + 1; v < u + 256; ++v) - if(v->c < v[-1].c) { - struct u *w, tmp = *v; - for(w = v; w > u && tmp.c < w[-1].c; --w) *w = w[-1]; - *w = tmp; - } - - for(m = -1,i = 0; i < 256 && !u[i].c; i++) - b[u[i].i]++, ++m; - - op = out; - - if(m < 0) { // no unused bytes found - size_t l; - *op++ = 0; - *op++ = u[0].i; - l = _srlec8(in, inlen, op, u[0].i)+2; - if(l < inlen) return l; - memcpy(out, in, inlen); - return inlen; - } - - *op++ = 1; - memset(op, 0, 32); - for(m = -1,i = 0; i < 256; i++) - if(b[i]) { - op[i>>3] |= 1<<(i&7); - rmap[++m] = i; - } - op += 32; - - ip = in; pp=in-1; - if(inlen > SRLE8) - while(ip < in+(inlen-1-SRLE8)) { - unsigned long long z; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - #if SRLE8 >= 32 - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - if((z = (ctou64(ip) ^ ctou64(ip+1)))) goto a; ip += 8; - #endif - __builtin_prefetch(ip +256, 0); - continue; - a: c = *ip; - ip += ctz64(z)>>3; - TRLEC(pp, ip, op, goto laba); - laba:pp = ip++; - } - - for(;ip < in+inlen; ip++) { - if(*ip != *(ip+1)) { - c = *ip; - TRLEC(pp, ip, op, goto labb); - labb:pp = ip; - } - } - - c = *ip; - TRLEC(pp,ip, op, goto labc); - labc: - if(op - out < inlen) - return op - out; - memcpy(out, in, inlen); - return inlen; -} - -#undef USIZE -#undef SRLE8 - -#define USIZE 16 -#include "trlec.c" -#undef USIZE - -#define USIZE 32 -#include "trlec.c" -#undef USIZE - -#define USIZE 64 -#include "trlec.c" -#undef USIZE - -#else -#define uint_t TEMPLATE3(uint, USIZE, _t) - -#define SRLEC(pp, ip, op, e) do {\ - unsigned i = ip - pp;\ - if(i > 3) { *(uint_t *)op = e; op+=sizeof(uint_t); i -= 3; vbput32(op, i); *(uint_t *)op = c; op+=sizeof(uint_t); }\ - else if(c == e) {\ - while(i--) { *(uint_t *)op = e; op+=sizeof(uint_t); vbput32(op, 0); }\ - } else while(i--) { *(uint_t *)op = c; op+=sizeof(uint_t); }\ -} while(0) - - #if !SRLE8 -unsigned TEMPLATE2(_srlec, USIZE)(const unsigned char *__restrict cin, unsigned inlen, unsigned char *__restrict out, uint_t e) { - unsigned char *op = out; - uint_t *in = (uint_t *)cin, *pp = in-1, *ip=in,c; - unsigned n = inlen/sizeof(uint_t); - unsigned char *p; - if(n > 4) - for(; ip < in+(n-1-4);) { - #if 0 - if(* ip == ip[1]) - if(*++ip == ip[1]) - if(*++ip == ip[1]) - if(*++ip == ip[1]) { - ip++; __builtin_prefetch(ip +256, 0); - continue; - } - #else - if(*ip != ip[1]) goto a; ++ip; - if(*ip != ip[1]) goto a; ++ip; - if(*ip != ip[1]) goto a; ++ip; - if(*ip != ip[1]) goto a; ++ip; __builtin_prefetch(ip +256, 0); - continue; - a:; - #endif - c = *ip; - SRLEC(pp,ip, op, e); - pp = ip++; - } - - for(;ip < in+n; ip++) - if(*ip != ip[1]) { - c = *ip; - SRLEC(pp,ip, op, e); - pp = ip; - } - c = *ip; - SRLEC(pp, ip, op, e); - - #if USIZE > 8 - p = (unsigned char *)ip; - while(p < cin+inlen) - *op++ = *p++; - #endif - return op - out; -} - #endif -#undef SRLEC - -unsigned TEMPLATE2(srlec, USIZE)(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, uint_t e) { - size_t l = TEMPLATE2(_srlec, USIZE)(in, inlen, out, e); - - if(l < inlen) - return l; - memcpy(out, in, inlen); - return inlen; -} -#endif - diff --git a/ext/trled.c b/ext/trled.c deleted file mode 100644 index 6ca1eb3..0000000 --- a/ext/trled.c +++ /dev/null @@ -1,259 +0,0 @@ -/** - Copyright (C) powturbo 2015-2018 - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - - email : powturbo [AT] gmail.com - - github : https://github.com/powturbo - - homepage : https://sites.google.com/site/powturbo/ - - twitter : https://twitter.com/powturbo - - TurboRLE - "Most efficient and fastest Run Length Encoding" -**/ - #ifndef USIZE -#include - #ifdef __SSE__ -#include - #endif - -#include "trle.h" -#include "trle_.h" - -//------------------------------------- RLE with Escape char ------------------------------------------------------------------ -//#define MEMSAFE -#define SRLE8 32 // 16// -#define USIZE 8 -#include "trled.c" - - #if SRLE8 -unsigned _srled8(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen, unsigned char e) { - const uint8_t *ip = in; - uint8_t *op = out, c; - uint32_t i; - #ifdef __SSE__ - __m128i ev = _mm_set1_epi8(e); - #endif - if(outlen >= SRLE8) - while(op < out+(outlen-SRLE8)) { - - #ifdef __SSE__ // TODO: test _mm_cmpestrm/_mm_cmpestri on sse4 - uint32_t mask; - __m128i u,v = _mm_loadu_si128((__m128i*)ip); _mm_storeu_si128((__m128i *)op, v); mask = _mm_movemask_epi8(_mm_cmpeq_epi8(v, ev)); if(mask) goto a; op += 16; ip += 16; - #if SRLE8 >= 32 - u = _mm_loadu_si128((__m128i*)ip); _mm_storeu_si128((__m128i *)op, u); mask = _mm_movemask_epi8(_mm_cmpeq_epi8(u, ev)); if(mask) goto a; op += 16; ip += 16; - #endif - __builtin_prefetch(ip+512, 0); - continue; - a: i = ctz32(mask); - op += i; ip += i+1; - { - #else - if(likely((c = *(uint8_t *)ip) != e)) { - ip++; - *op++ = c; - } else { - #endif - vbget32(ip, i); - if(likely(i)) { - uint8_t c = *ip++; - i += TMIN; - rmemset(op, c, i); - } else - *op++ = e; - } - } - - #define rmemset8(_op_, _c_, _i_) while(_i_--) *_op_++ = _c_ - while(op < out+outlen) - if(likely((c = *ip) != e)) { - ip++; - *op++ = c; - } else { - int i; - ip++; - vbget32(ip, i); - if(likely(i)) { - c = *ip++; - i += TMIN; - rmemset8(op, c, i); - } else - *op++ = e; - } - return ip - in; -} - #endif - -unsigned _srled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen) { - return _srled8(in+1, out, outlen, *in); -} - -unsigned srled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen) { - if(inlen == outlen) - memcpy(out, in, outlen); - else if(inlen == 1) - memset(out, in[0], outlen); - else - return _srled8(in+1, out, outlen, *in); - return inlen; -} -//------------------------------------- TurboRLE ------------------------------------------ -unsigned _trled(const unsigned char *__restrict in, unsigned char *__restrict out, unsigned outlen) { - uint8_t b[256] = {0},*op = out; - const uint8_t *ip; - int m = -1, i, c; - - if(outlen < 1) - return 0; - - if(!*in++) - return _srled8(in+1, out, outlen, *in)+2; - - for(ip = in; ip < in+32; ip++) - for(i = 0; i < 8; ++i) - if(((*ip) >> i) & 1) - b[(ip-in)<<3 | i] = ++m+1; - - if(outlen >= 32) - while(op < out+(outlen-32)) { - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - if(b[*ip]) goto a; *op++ = *ip++; - __builtin_prefetch(ip+256, 0); - continue; - a: - c = b[*ip++]; - vbzget(ip, i, m, c-1); - c = *ip++; - i += 3; - rmemset(op,c,i); - } - while(op < out+outlen) { - if(likely(!(c = b[*ip]))) - *op++ = *ip++; - else { - ip++; - vbzget(ip, i, m, c-1); - c = *ip++; - i += 3; - rmemset8(op,c,i); - } - } - return ip - in; -} - -unsigned trled(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen) { - if(inlen == outlen) - memcpy(out, in, outlen); - else if(inlen == 1) - memset(out, in[0], outlen); - else - return _trled(in, out, outlen); - return inlen; -} - -#undef USIZE -#undef rmemset -#undef SRLE8 - -#define USIZE 16 -#include "trled.c" -#undef rmemset -#undef USIZE -#undef runcpy - -#define USIZE 32 -#include "trled.c" -#undef rmemset -#undef USIZE -#undef runcpy - -#define USIZE 64 -#include "trled.c" -#undef rmemset -#undef USIZE - - #else - #ifdef MEMSAFE -#define rmemset(_op_, _c_, _i_) while(_i_--) *_op_++ = _c_ - #elif defined(__SSE__) && USIZE < 64 -#define rmemset(_op_, _c_, _i_) do { \ - __m128i *_up = (__m128i *)_op_, cv = TEMPLATE2(_mm_set1_epi, USIZE)(_c_);\ - _op_ += _i_;\ - do { _mm_storeu_si128( _up, cv); _mm_storeu_si128(_up+1, cv); _up+=2; } while(_up < (__m128i *)_op_);\ -} while(0) - #else -#define _cset64(_cc,_c_) _cc = _c_ -#define _cset32(_cc,_c_) _cc = _c_; _cc = _cc<<32|_cc -#define _cset16(_cc,_c_) _cc = _c_; _cc = _cc<<48|_cc<<32|_cc<<16|_cc -#define _cset8( _cc,_c_) _cc = (uint32_t)_c_<<24 | (uint32_t)_c_<<16 | (uint32_t)_c_<<8 | (uint32_t)_c_; _cc = _cc<<32|_cc - -#define rmemset(_op_, _c_, _i_) do { uint64_t _cc; uint8_t *_up = (uint8_t *)_op_; _op_ +=_i_;\ - TEMPLATE2(_cset, USIZE)(_cc,_c_);\ - do {\ - TEMPLATE2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\ - TEMPLATE2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\ - } while(_up < (uint8_t *)_op_);\ -} while(0) - #endif - -#define uint_t TEMPLATE3(uint, USIZE, _t) - - #if !SRLE8 -unsigned TEMPLATE2(_srled, USIZE)(const unsigned char *__restrict in, unsigned char *__restrict cout, unsigned outlen, uint_t e) { - uint_t *out = (uint_t *)cout, *op = out, c; - const unsigned char *ip = in; - - while(op < out+outlen/sizeof(uint_t)) { __builtin_prefetch(ip +384, 0); - if(likely((c = *(uint_t *)ip) != e)) { - ip += sizeof(uint_t); - *op++ = c; - } else { - int i; - ip += sizeof(uint_t); - vbget32(ip, i); - if(likely(i)) { - c = *(uint_t *)ip; - ip += sizeof(uint_t); - i += 3; - rmemset(op, c, i); - } else - *op++ = e; - } - } - #if USIZE > 8 - { unsigned char *p = (unsigned char *)op; - while(p < cout+outlen) *p++ = *ip++; - } - #endif - return ip - in; -} - #endif - -unsigned TEMPLATE2(srled, USIZE)(const unsigned char *__restrict in, unsigned inlen, unsigned char *__restrict out, unsigned outlen, uint_t e) { - if(inlen == outlen) - memcpy(out, in, outlen); - else if(inlen == 1) - memset(out, in[0], outlen); - else - return TEMPLATE2(_srled, USIZE)(in, out, outlen, e); - return inlen; -} - #endif