From dace26700b14fac543aa49e0101d1b921a37b8c4 Mon Sep 17 00:00:00 2001 From: powturbo Date: Sat, 13 Nov 2021 19:01:33 +0100 Subject: [PATCH] TurboPFor: TurboPFor decode --- vp4d.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vp4d.c b/vp4d.c index dd65fc2..a255fc2 100644 --- a/vp4d.c +++ b/vp4d.c @@ -266,7 +266,7 @@ extern char _shuffle_16[256][16]; ALWAYS_INLINE unsigned char *TEMPLATE2(_P4DEC, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out P4DELTA(uint_t start), unsigned b, unsigned bx ) { if(!(b & 0x80)) { #if USIZE == 64 - b = (b == 63)?64:b; // 64 is encoded for bitsize 63 (permits using only 6 bits for b) + b = (b == 63)?64:b; // 63,64 are both encoded w. same bitsize 64 (permits using only 6 bits for b) #endif return TEMPLATE2(BITUNPACKD, USIZE)(in, n, out P4DELTA(start), b); // bitunpack only } @@ -311,7 +311,7 @@ ALWAYS_INLINE unsigned char *TEMPLATE2(_P4DEC, USIZE)(unsigned char *__restrict { uint_t *_op = out,*op,*pex = ex; for(i = 0; i < p4dn; i++) { for(op=_op; bb[i]; bb[i] >>= 4,op+=4) { const unsigned m = bb[i]&0xf; - _mm_storeu_si128((__m128i *)op, _mm_add_epi32(_mm_loadu_si128((__m128i*)op), _mm_shuffle_epi8(mm_slli_epi32(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)_shuffle_32[m]) ) )); pex += popcnt32(m); + _mm_storeu_si128((__m128i *)op, _mm_add_epi32(_mm_loadu_si128((__m128i*)op), _mm_shuffle_epi8(_mm_slli_epi32(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)_shuffle_32[m]) ) )); pex += popcnt32(m); } _op+=64; } } @@ -319,7 +319,7 @@ ALWAYS_INLINE unsigned char *TEMPLATE2(_P4DEC, USIZE)(unsigned char *__restrict { uint_t *_op = out, *op, *pex = ex; for(i = 0; i < p4dn; i++) { for(op = _op; bb[i]; bb[i] >>= 8,op += 8) { const unsigned char m = bb[i]; - _mm_storeu_si128((__m128i *)op, _mm_add_epi16(_mm_loadu_si128((__m128i*)op), _mm_shuffle_epi8(mm_slli_epi16(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)_shuffle_16[m]) ) )); pex += popcnt32(m); + _mm_storeu_si128((__m128i *)op, _mm_add_epi16(_mm_loadu_si128((__m128i*)op), _mm_shuffle_epi8(_mm_slli_epi16(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)_shuffle_16[m]) ) )); pex += popcnt32(m); } _op += 64; } }