From a1b0adee01574cfe6331cd4c5c6ddfada2e77ec1 Mon Sep 17 00:00:00 2001 From: powturbo Date: Thu, 11 Jun 2015 20:49:02 +0200 Subject: [PATCH] BitPack include --- bitpack64_.h | 5313 +++++++++++++++++++++----------------------------- 1 file changed, 2234 insertions(+), 3079 deletions(-) diff --git a/bitpack64_.h b/bitpack64_.h index e00adf9..1fc0605 100644 --- a/bitpack64_.h +++ b/bitpack64_.h @@ -21,3083 +21,2238 @@ - twitter : https://twitter.com/powturbo - email : powturbo [_AT_] gmail [_DOT_] com **/ -// bitunpack include -#define BITUNBLK32_0(ip, i, op, parm) { \ - DST(op,i*0+ 0, 0, parm);\ - DST(op,i*0+ 1, 0, parm);\ - DST(op,i*0+ 2, 0, parm);\ - DST(op,i*0+ 3, 0, parm);\ - DST(op,i*0+ 4, 0, parm);\ - DST(op,i*0+ 5, 0, parm);\ - DST(op,i*0+ 6, 0, parm);\ - DST(op,i*0+ 7, 0, parm);\ - DST(op,i*0+ 8, 0, parm);\ - DST(op,i*0+ 9, 0, parm);\ - DST(op,i*0+10, 0, parm);\ - DST(op,i*0+11, 0, parm);\ - DST(op,i*0+12, 0, parm);\ - DST(op,i*0+13, 0, parm);\ - DST(op,i*0+14, 0, parm);\ - DST(op,i*0+15, 0, parm);\ - DST(op,i*0+16, 0, parm);\ - DST(op,i*0+17, 0, parm);\ - DST(op,i*0+18, 0, parm);\ - DST(op,i*0+19, 0, parm);\ - DST(op,i*0+20, 0, parm);\ - DST(op,i*0+21, 0, parm);\ - DST(op,i*0+22, 0, parm);\ - DST(op,i*0+23, 0, parm);\ - DST(op,i*0+24, 0, parm);\ - DST(op,i*0+25, 0, parm);\ - DST(op,i*0+26, 0, parm);\ - DST(op,i*0+27, 0, parm);\ - DST(op,i*0+28, 0, parm);\ - DST(op,i*0+29, 0, parm);\ - DST(op,i*0+30, 0, parm);\ - DST(op,i*0+31, 0, parm);;\ -} - -#define BITUNPACK64_0(ip, op, parm) { \ - BITUNBLK32_0(ip, 0, op, parm); DSTI(op);\ -} - -#define BITUNBLK32_1(ip, i, op, parm) { register uint32_t w0 = *(uint32_t *)(ip+(i*1+0)*4/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x1, parm);\ - DST(op,i*32+ 1, (w0 >> 1) & 0x1, parm);\ - DST(op,i*32+ 2, (w0 >> 2) & 0x1, parm);\ - DST(op,i*32+ 3, (w0 >> 3) & 0x1, parm);\ - DST(op,i*32+ 4, (w0 >> 4) & 0x1, parm);\ - DST(op,i*32+ 5, (w0 >> 5) & 0x1, parm);\ - DST(op,i*32+ 6, (w0 >> 6) & 0x1, parm);\ - DST(op,i*32+ 7, (w0 >> 7) & 0x1, parm);\ - DST(op,i*32+ 8, (w0 >> 8) & 0x1, parm);\ - DST(op,i*32+ 9, (w0 >> 9) & 0x1, parm);\ - DST(op,i*32+10, (w0 >> 10) & 0x1, parm);\ - DST(op,i*32+11, (w0 >> 11) & 0x1, parm);\ - DST(op,i*32+12, (w0 >> 12) & 0x1, parm);\ - DST(op,i*32+13, (w0 >> 13) & 0x1, parm);\ - DST(op,i*32+14, (w0 >> 14) & 0x1, parm);\ - DST(op,i*32+15, (w0 >> 15) & 0x1, parm);\ - DST(op,i*32+16, (w0 >> 16) & 0x1, parm);\ - DST(op,i*32+17, (w0 >> 17) & 0x1, parm);\ - DST(op,i*32+18, (w0 >> 18) & 0x1, parm);\ - DST(op,i*32+19, (w0 >> 19) & 0x1, parm);\ - DST(op,i*32+20, (w0 >> 20) & 0x1, parm);\ - DST(op,i*32+21, (w0 >> 21) & 0x1, parm);\ - DST(op,i*32+22, (w0 >> 22) & 0x1, parm);\ - DST(op,i*32+23, (w0 >> 23) & 0x1, parm);\ - DST(op,i*32+24, (w0 >> 24) & 0x1, parm);\ - DST(op,i*32+25, (w0 >> 25) & 0x1, parm);\ - DST(op,i*32+26, (w0 >> 26) & 0x1, parm);\ - DST(op,i*32+27, (w0 >> 27) & 0x1, parm);\ - DST(op,i*32+28, (w0 >> 28) & 0x1, parm);\ - DST(op,i*32+29, (w0 >> 29) & 0x1, parm);\ - DST(op,i*32+30, (w0 >> 30) & 0x1, parm);\ - DST(op,i*32+31, (w0 >> 31) , parm);;\ -} - -#define BITUNPACK64_1(ip, op, parm) { \ - BITUNBLK32_1(ip, 0, op, parm); DSTI(op); ip += 1*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_2(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*1+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3, parm);\ - DST(op,i*32+ 1, (w0 >> 2) & 0x3, parm);\ - DST(op,i*32+ 2, (w0 >> 4) & 0x3, parm);\ - DST(op,i*32+ 3, (w0 >> 6) & 0x3, parm);\ - DST(op,i*32+ 4, (w0 >> 8) & 0x3, parm);\ - DST(op,i*32+ 5, (w0 >> 10) & 0x3, parm);\ - DST(op,i*32+ 6, (w0 >> 12) & 0x3, parm);\ - DST(op,i*32+ 7, (w0 >> 14) & 0x3, parm);\ - DST(op,i*32+ 8, (w0 >> 16) & 0x3, parm);\ - DST(op,i*32+ 9, (w0 >> 18) & 0x3, parm);\ - DST(op,i*32+10, (w0 >> 20) & 0x3, parm);\ - DST(op,i*32+11, (w0 >> 22) & 0x3, parm);\ - DST(op,i*32+12, (w0 >> 24) & 0x3, parm);\ - DST(op,i*32+13, (w0 >> 26) & 0x3, parm);\ - DST(op,i*32+14, (w0 >> 28) & 0x3, parm);\ - DST(op,i*32+15, (w0 >> 30) & 0x3, parm);\ - DST(op,i*32+16, (w0 >> 32) & 0x3, parm);\ - DST(op,i*32+17, (w0 >> 34) & 0x3, parm);\ - DST(op,i*32+18, (w0 >> 36) & 0x3, parm);\ - DST(op,i*32+19, (w0 >> 38) & 0x3, parm);\ - DST(op,i*32+20, (w0 >> 40) & 0x3, parm);\ - DST(op,i*32+21, (w0 >> 42) & 0x3, parm);\ - DST(op,i*32+22, (w0 >> 44) & 0x3, parm);\ - DST(op,i*32+23, (w0 >> 46) & 0x3, parm);\ - DST(op,i*32+24, (w0 >> 48) & 0x3, parm);\ - DST(op,i*32+25, (w0 >> 50) & 0x3, parm);\ - DST(op,i*32+26, (w0 >> 52) & 0x3, parm);\ - DST(op,i*32+27, (w0 >> 54) & 0x3, parm);\ - DST(op,i*32+28, (w0 >> 56) & 0x3, parm);\ - DST(op,i*32+29, (w0 >> 58) & 0x3, parm);\ - DST(op,i*32+30, (w0 >> 60) & 0x3, parm);\ - DST(op,i*32+31, (w0 >> 62) , parm);;\ -} - -#define BITUNPACK64_2(ip, op, parm) { \ - BITUNBLK64_2(ip, 0, op, parm); DSTI(op); ip += 2*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_3(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*3+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7, parm);\ - DST(op,i*64+ 1, (w0 >> 3) & 0x7, parm);\ - DST(op,i*64+ 2, (w0 >> 6) & 0x7, parm);\ - DST(op,i*64+ 3, (w0 >> 9) & 0x7, parm);\ - DST(op,i*64+ 4, (w0 >> 12) & 0x7, parm);\ - DST(op,i*64+ 5, (w0 >> 15) & 0x7, parm);\ - DST(op,i*64+ 6, (w0 >> 18) & 0x7, parm);\ - DST(op,i*64+ 7, (w0 >> 21) & 0x7, parm);\ - DST(op,i*64+ 8, (w0 >> 24) & 0x7, parm);\ - DST(op,i*64+ 9, (w0 >> 27) & 0x7, parm);\ - DST(op,i*64+10, (w0 >> 30) & 0x7, parm);\ - DST(op,i*64+11, (w0 >> 33) & 0x7, parm);\ - DST(op,i*64+12, (w0 >> 36) & 0x7, parm);\ - DST(op,i*64+13, (w0 >> 39) & 0x7, parm);\ - DST(op,i*64+14, (w0 >> 42) & 0x7, parm);\ - DST(op,i*64+15, (w0 >> 45) & 0x7, parm);\ - DST(op,i*64+16, (w0 >> 48) & 0x7, parm);\ - DST(op,i*64+17, (w0 >> 51) & 0x7, parm);\ - DST(op,i*64+18, (w0 >> 54) & 0x7, parm);\ - DST(op,i*64+19, (w0 >> 57) & 0x7, parm);\ - DST(op,i*64+20, (w0 >> 60) & 0x7, parm); register uint64_t w1 = *(uint32_t *)(ip+(i*3+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w0 >> 63) | (w1 << 1) & 0x7, parm);\ - DST(op,i*64+22, (w1 >> 2) & 0x7, parm);\ - DST(op,i*64+23, (w1 >> 5) & 0x7, parm);\ - DST(op,i*64+24, (w1 >> 8) & 0x7, parm);\ - DST(op,i*64+25, (w1 >> 11) & 0x7, parm);\ - DST(op,i*64+26, (w1 >> 14) & 0x7, parm);\ - DST(op,i*64+27, (w1 >> 17) & 0x7, parm);\ - DST(op,i*64+28, (w1 >> 20) & 0x7, parm);\ - DST(op,i*64+29, (w1 >> 23) & 0x7, parm);\ - DST(op,i*64+30, (w1 >> 26) & 0x7, parm);\ - DST(op,i*64+31, (w1 >> 29) & 0x7, parm);;\ -} - -#define BITUNPACK64_3(ip, op, parm) { \ - BITUNBLK64_3(ip, 0, op, parm); DSTI(op); ip += 3*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_4(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*1+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xf, parm);\ - DST(op,i*16+ 1, (w0 >> 4) & 0xf, parm);\ - DST(op,i*16+ 2, (w0 >> 8) & 0xf, parm);\ - DST(op,i*16+ 3, (w0 >> 12) & 0xf, parm);\ - DST(op,i*16+ 4, (w0 >> 16) & 0xf, parm);\ - DST(op,i*16+ 5, (w0 >> 20) & 0xf, parm);\ - DST(op,i*16+ 6, (w0 >> 24) & 0xf, parm);\ - DST(op,i*16+ 7, (w0 >> 28) & 0xf, parm);\ - DST(op,i*16+ 8, (w0 >> 32) & 0xf, parm);\ - DST(op,i*16+ 9, (w0 >> 36) & 0xf, parm);\ - DST(op,i*16+10, (w0 >> 40) & 0xf, parm);\ - DST(op,i*16+11, (w0 >> 44) & 0xf, parm);\ - DST(op,i*16+12, (w0 >> 48) & 0xf, parm);\ - DST(op,i*16+13, (w0 >> 52) & 0xf, parm);\ - DST(op,i*16+14, (w0 >> 56) & 0xf, parm);\ - DST(op,i*16+15, (w0 >> 60) , parm);;\ -} - -#define BITUNPACK64_4(ip, op, parm) { \ - BITUNBLK64_4(ip, 0, op, parm);\ - BITUNBLK64_4(ip, 1, op, parm); DSTI(op); ip += 4*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_5(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*5+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1f, parm);\ - DST(op,i*64+ 1, (w0 >> 5) & 0x1f, parm);\ - DST(op,i*64+ 2, (w0 >> 10) & 0x1f, parm);\ - DST(op,i*64+ 3, (w0 >> 15) & 0x1f, parm);\ - DST(op,i*64+ 4, (w0 >> 20) & 0x1f, parm);\ - DST(op,i*64+ 5, (w0 >> 25) & 0x1f, parm);\ - DST(op,i*64+ 6, (w0 >> 30) & 0x1f, parm);\ - DST(op,i*64+ 7, (w0 >> 35) & 0x1f, parm);\ - DST(op,i*64+ 8, (w0 >> 40) & 0x1f, parm);\ - DST(op,i*64+ 9, (w0 >> 45) & 0x1f, parm);\ - DST(op,i*64+10, (w0 >> 50) & 0x1f, parm);\ - DST(op,i*64+11, (w0 >> 55) & 0x1f, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*5+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w0 >> 60) | (w1 << 4) & 0x1f, parm);\ - DST(op,i*64+13, (w1 >> 1) & 0x1f, parm);\ - DST(op,i*64+14, (w1 >> 6) & 0x1f, parm);\ - DST(op,i*64+15, (w1 >> 11) & 0x1f, parm);\ - DST(op,i*64+16, (w1 >> 16) & 0x1f, parm);\ - DST(op,i*64+17, (w1 >> 21) & 0x1f, parm);\ - DST(op,i*64+18, (w1 >> 26) & 0x1f, parm);\ - DST(op,i*64+19, (w1 >> 31) & 0x1f, parm);\ - DST(op,i*64+20, (w1 >> 36) & 0x1f, parm);\ - DST(op,i*64+21, (w1 >> 41) & 0x1f, parm);\ - DST(op,i*64+22, (w1 >> 46) & 0x1f, parm);\ - DST(op,i*64+23, (w1 >> 51) & 0x1f, parm);\ - DST(op,i*64+24, (w1 >> 56) & 0x1f, parm); register uint64_t w2 = *(uint32_t *)(ip+(i*5+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w1 >> 61) | (w2 << 3) & 0x1f, parm);\ - DST(op,i*64+26, (w2 >> 2) & 0x1f, parm);\ - DST(op,i*64+27, (w2 >> 7) & 0x1f, parm);\ - DST(op,i*64+28, (w2 >> 12) & 0x1f, parm);\ - DST(op,i*64+29, (w2 >> 17) & 0x1f, parm);\ - DST(op,i*64+30, (w2 >> 22) & 0x1f, parm);\ - DST(op,i*64+31, (w2 >> 27) & 0x1f, parm);;\ -} - -#define BITUNPACK64_5(ip, op, parm) { \ - BITUNBLK64_5(ip, 0, op, parm); DSTI(op); ip += 5*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_6(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*3+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3f, parm);\ - DST(op,i*32+ 1, (w0 >> 6) & 0x3f, parm);\ - DST(op,i*32+ 2, (w0 >> 12) & 0x3f, parm);\ - DST(op,i*32+ 3, (w0 >> 18) & 0x3f, parm);\ - DST(op,i*32+ 4, (w0 >> 24) & 0x3f, parm);\ - DST(op,i*32+ 5, (w0 >> 30) & 0x3f, parm);\ - DST(op,i*32+ 6, (w0 >> 36) & 0x3f, parm);\ - DST(op,i*32+ 7, (w0 >> 42) & 0x3f, parm);\ - DST(op,i*32+ 8, (w0 >> 48) & 0x3f, parm);\ - DST(op,i*32+ 9, (w0 >> 54) & 0x3f, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*3+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w0 >> 60) | (w1 << 4) & 0x3f, parm);\ - DST(op,i*32+11, (w1 >> 2) & 0x3f, parm);\ - DST(op,i*32+12, (w1 >> 8) & 0x3f, parm);\ - DST(op,i*32+13, (w1 >> 14) & 0x3f, parm);\ - DST(op,i*32+14, (w1 >> 20) & 0x3f, parm);\ - DST(op,i*32+15, (w1 >> 26) & 0x3f, parm);\ - DST(op,i*32+16, (w1 >> 32) & 0x3f, parm);\ - DST(op,i*32+17, (w1 >> 38) & 0x3f, parm);\ - DST(op,i*32+18, (w1 >> 44) & 0x3f, parm);\ - DST(op,i*32+19, (w1 >> 50) & 0x3f, parm);\ - DST(op,i*32+20, (w1 >> 56) & 0x3f, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*3+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w1 >> 62) | (w2 << 2) & 0x3f, parm);\ - DST(op,i*32+22, (w2 >> 4) & 0x3f, parm);\ - DST(op,i*32+23, (w2 >> 10) & 0x3f, parm);\ - DST(op,i*32+24, (w2 >> 16) & 0x3f, parm);\ - DST(op,i*32+25, (w2 >> 22) & 0x3f, parm);\ - DST(op,i*32+26, (w2 >> 28) & 0x3f, parm);\ - DST(op,i*32+27, (w2 >> 34) & 0x3f, parm);\ - DST(op,i*32+28, (w2 >> 40) & 0x3f, parm);\ - DST(op,i*32+29, (w2 >> 46) & 0x3f, parm);\ - DST(op,i*32+30, (w2 >> 52) & 0x3f, parm);\ - DST(op,i*32+31, (w2 >> 58) , parm);;\ -} - -#define BITUNPACK64_6(ip, op, parm) { \ - BITUNBLK64_6(ip, 0, op, parm); DSTI(op); ip += 6*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_7(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*7+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7f, parm);\ - DST(op,i*64+ 1, (w0 >> 7) & 0x7f, parm);\ - DST(op,i*64+ 2, (w0 >> 14) & 0x7f, parm);\ - DST(op,i*64+ 3, (w0 >> 21) & 0x7f, parm);\ - DST(op,i*64+ 4, (w0 >> 28) & 0x7f, parm);\ - DST(op,i*64+ 5, (w0 >> 35) & 0x7f, parm);\ - DST(op,i*64+ 6, (w0 >> 42) & 0x7f, parm);\ - DST(op,i*64+ 7, (w0 >> 49) & 0x7f, parm);\ - DST(op,i*64+ 8, (w0 >> 56) & 0x7f, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*7+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w0 >> 63) | (w1 << 1) & 0x7f, parm);\ - DST(op,i*64+10, (w1 >> 6) & 0x7f, parm);\ - DST(op,i*64+11, (w1 >> 13) & 0x7f, parm);\ - DST(op,i*64+12, (w1 >> 20) & 0x7f, parm);\ - DST(op,i*64+13, (w1 >> 27) & 0x7f, parm);\ - DST(op,i*64+14, (w1 >> 34) & 0x7f, parm);\ - DST(op,i*64+15, (w1 >> 41) & 0x7f, parm);\ - DST(op,i*64+16, (w1 >> 48) & 0x7f, parm);\ - DST(op,i*64+17, (w1 >> 55) & 0x7f, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*7+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w1 >> 62) | (w2 << 2) & 0x7f, parm);\ - DST(op,i*64+19, (w2 >> 5) & 0x7f, parm);\ - DST(op,i*64+20, (w2 >> 12) & 0x7f, parm);\ - DST(op,i*64+21, (w2 >> 19) & 0x7f, parm);\ - DST(op,i*64+22, (w2 >> 26) & 0x7f, parm);\ - DST(op,i*64+23, (w2 >> 33) & 0x7f, parm);\ - DST(op,i*64+24, (w2 >> 40) & 0x7f, parm);\ - DST(op,i*64+25, (w2 >> 47) & 0x7f, parm);\ - DST(op,i*64+26, (w2 >> 54) & 0x7f, parm); register uint64_t w3 = *(uint32_t *)(ip+(i*7+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w2 >> 61) | (w3 << 3) & 0x7f, parm);\ - DST(op,i*64+28, (w3 >> 4) & 0x7f, parm);\ - DST(op,i*64+29, (w3 >> 11) & 0x7f, parm);\ - DST(op,i*64+30, (w3 >> 18) & 0x7f, parm);\ - DST(op,i*64+31, (w3 >> 25) & 0x7f, parm);;\ -} - -#define BITUNPACK64_7(ip, op, parm) { \ - BITUNBLK64_7(ip, 0, op, parm); DSTI(op); ip += 7*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_8(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*1+0)*8/sizeof(ip[0]));\ - DST(op,i*8+ 0, (w0 ) & 0xff, parm);\ - DST(op,i*8+ 1, (w0 >> 8) & 0xff, parm);\ - DST(op,i*8+ 2, (w0 >> 16) & 0xff, parm);\ - DST(op,i*8+ 3, (w0 >> 24) & 0xff, parm);\ - DST(op,i*8+ 4, (w0 >> 32) & 0xff, parm);\ - DST(op,i*8+ 5, (w0 >> 40) & 0xff, parm);\ - DST(op,i*8+ 6, (w0 >> 48) & 0xff, parm);\ - DST(op,i*8+ 7, (w0 >> 56) , parm);;\ -} - -#define BITUNPACK64_8(ip, op, parm) { \ - BITUNBLK64_8(ip, 0, op, parm);\ - BITUNBLK64_8(ip, 1, op, parm);\ - BITUNBLK64_8(ip, 2, op, parm);\ - BITUNBLK64_8(ip, 3, op, parm); DSTI(op); ip += 8*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_9(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*9+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ff, parm);\ - DST(op,i*64+ 1, (w0 >> 9) & 0x1ff, parm);\ - DST(op,i*64+ 2, (w0 >> 18) & 0x1ff, parm);\ - DST(op,i*64+ 3, (w0 >> 27) & 0x1ff, parm);\ - DST(op,i*64+ 4, (w0 >> 36) & 0x1ff, parm);\ - DST(op,i*64+ 5, (w0 >> 45) & 0x1ff, parm);\ - DST(op,i*64+ 6, (w0 >> 54) & 0x1ff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*9+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w0 >> 63) | (w1 << 1) & 0x1ff, parm);\ - DST(op,i*64+ 8, (w1 >> 8) & 0x1ff, parm);\ - DST(op,i*64+ 9, (w1 >> 17) & 0x1ff, parm);\ - DST(op,i*64+10, (w1 >> 26) & 0x1ff, parm);\ - DST(op,i*64+11, (w1 >> 35) & 0x1ff, parm);\ - DST(op,i*64+12, (w1 >> 44) & 0x1ff, parm);\ - DST(op,i*64+13, (w1 >> 53) & 0x1ff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*9+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w1 >> 62) | (w2 << 2) & 0x1ff, parm);\ - DST(op,i*64+15, (w2 >> 7) & 0x1ff, parm);\ - DST(op,i*64+16, (w2 >> 16) & 0x1ff, parm);\ - DST(op,i*64+17, (w2 >> 25) & 0x1ff, parm);\ - DST(op,i*64+18, (w2 >> 34) & 0x1ff, parm);\ - DST(op,i*64+19, (w2 >> 43) & 0x1ff, parm);\ - DST(op,i*64+20, (w2 >> 52) & 0x1ff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*9+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w2 >> 61) | (w3 << 3) & 0x1ff, parm);\ - DST(op,i*64+22, (w3 >> 6) & 0x1ff, parm);\ - DST(op,i*64+23, (w3 >> 15) & 0x1ff, parm);\ - DST(op,i*64+24, (w3 >> 24) & 0x1ff, parm);\ - DST(op,i*64+25, (w3 >> 33) & 0x1ff, parm);\ - DST(op,i*64+26, (w3 >> 42) & 0x1ff, parm);\ - DST(op,i*64+27, (w3 >> 51) & 0x1ff, parm); register uint64_t w4 = *(uint32_t *)(ip+(i*9+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w3 >> 60) | (w4 << 4) & 0x1ff, parm);\ - DST(op,i*64+29, (w4 >> 5) & 0x1ff, parm);\ - DST(op,i*64+30, (w4 >> 14) & 0x1ff, parm);\ - DST(op,i*64+31, (w4 >> 23) & 0x1ff, parm);;\ -} - -#define BITUNPACK64_9(ip, op, parm) { \ - BITUNBLK64_9(ip, 0, op, parm); DSTI(op); ip += 9*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_10(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*5+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ff, parm);\ - DST(op,i*32+ 1, (w0 >> 10) & 0x3ff, parm);\ - DST(op,i*32+ 2, (w0 >> 20) & 0x3ff, parm);\ - DST(op,i*32+ 3, (w0 >> 30) & 0x3ff, parm);\ - DST(op,i*32+ 4, (w0 >> 40) & 0x3ff, parm);\ - DST(op,i*32+ 5, (w0 >> 50) & 0x3ff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*5+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w0 >> 60) | (w1 << 4) & 0x3ff, parm);\ - DST(op,i*32+ 7, (w1 >> 6) & 0x3ff, parm);\ - DST(op,i*32+ 8, (w1 >> 16) & 0x3ff, parm);\ - DST(op,i*32+ 9, (w1 >> 26) & 0x3ff, parm);\ - DST(op,i*32+10, (w1 >> 36) & 0x3ff, parm);\ - DST(op,i*32+11, (w1 >> 46) & 0x3ff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*5+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w1 >> 56) | (w2 << 8) & 0x3ff, parm);\ - DST(op,i*32+13, (w2 >> 2) & 0x3ff, parm);\ - DST(op,i*32+14, (w2 >> 12) & 0x3ff, parm);\ - DST(op,i*32+15, (w2 >> 22) & 0x3ff, parm);\ - DST(op,i*32+16, (w2 >> 32) & 0x3ff, parm);\ - DST(op,i*32+17, (w2 >> 42) & 0x3ff, parm);\ - DST(op,i*32+18, (w2 >> 52) & 0x3ff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*5+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w2 >> 62) | (w3 << 2) & 0x3ff, parm);\ - DST(op,i*32+20, (w3 >> 8) & 0x3ff, parm);\ - DST(op,i*32+21, (w3 >> 18) & 0x3ff, parm);\ - DST(op,i*32+22, (w3 >> 28) & 0x3ff, parm);\ - DST(op,i*32+23, (w3 >> 38) & 0x3ff, parm);\ - DST(op,i*32+24, (w3 >> 48) & 0x3ff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*5+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w3 >> 58) | (w4 << 6) & 0x3ff, parm);\ - DST(op,i*32+26, (w4 >> 4) & 0x3ff, parm);\ - DST(op,i*32+27, (w4 >> 14) & 0x3ff, parm);\ - DST(op,i*32+28, (w4 >> 24) & 0x3ff, parm);\ - DST(op,i*32+29, (w4 >> 34) & 0x3ff, parm);\ - DST(op,i*32+30, (w4 >> 44) & 0x3ff, parm);\ - DST(op,i*32+31, (w4 >> 54) , parm);;\ -} - -#define BITUNPACK64_10(ip, op, parm) { \ - BITUNBLK64_10(ip, 0, op, parm); DSTI(op); ip += 10*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_11(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*11+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ff, parm);\ - DST(op,i*64+ 1, (w0 >> 11) & 0x7ff, parm);\ - DST(op,i*64+ 2, (w0 >> 22) & 0x7ff, parm);\ - DST(op,i*64+ 3, (w0 >> 33) & 0x7ff, parm);\ - DST(op,i*64+ 4, (w0 >> 44) & 0x7ff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*11+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w0 >> 55) | (w1 << 9) & 0x7ff, parm);\ - DST(op,i*64+ 6, (w1 >> 2) & 0x7ff, parm);\ - DST(op,i*64+ 7, (w1 >> 13) & 0x7ff, parm);\ - DST(op,i*64+ 8, (w1 >> 24) & 0x7ff, parm);\ - DST(op,i*64+ 9, (w1 >> 35) & 0x7ff, parm);\ - DST(op,i*64+10, (w1 >> 46) & 0x7ff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*11+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w1 >> 57) | (w2 << 7) & 0x7ff, parm);\ - DST(op,i*64+12, (w2 >> 4) & 0x7ff, parm);\ - DST(op,i*64+13, (w2 >> 15) & 0x7ff, parm);\ - DST(op,i*64+14, (w2 >> 26) & 0x7ff, parm);\ - DST(op,i*64+15, (w2 >> 37) & 0x7ff, parm);\ - DST(op,i*64+16, (w2 >> 48) & 0x7ff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*11+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w2 >> 59) | (w3 << 5) & 0x7ff, parm);\ - DST(op,i*64+18, (w3 >> 6) & 0x7ff, parm);\ - DST(op,i*64+19, (w3 >> 17) & 0x7ff, parm);\ - DST(op,i*64+20, (w3 >> 28) & 0x7ff, parm);\ - DST(op,i*64+21, (w3 >> 39) & 0x7ff, parm);\ - DST(op,i*64+22, (w3 >> 50) & 0x7ff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*11+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w3 >> 61) | (w4 << 3) & 0x7ff, parm);\ - DST(op,i*64+24, (w4 >> 8) & 0x7ff, parm);\ - DST(op,i*64+25, (w4 >> 19) & 0x7ff, parm);\ - DST(op,i*64+26, (w4 >> 30) & 0x7ff, parm);\ - DST(op,i*64+27, (w4 >> 41) & 0x7ff, parm);\ - DST(op,i*64+28, (w4 >> 52) & 0x7ff, parm); register uint64_t w5 = *(uint32_t *)(ip+(i*11+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w4 >> 63) | (w5 << 1) & 0x7ff, parm);\ - DST(op,i*64+30, (w5 >> 10) & 0x7ff, parm);\ - DST(op,i*64+31, (w5 >> 21) & 0x7ff, parm);;\ -} - -#define BITUNPACK64_11(ip, op, parm) { \ - BITUNBLK64_11(ip, 0, op, parm); DSTI(op); ip += 11*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_12(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*3+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfff, parm);\ - DST(op,i*16+ 1, (w0 >> 12) & 0xfff, parm);\ - DST(op,i*16+ 2, (w0 >> 24) & 0xfff, parm);\ - DST(op,i*16+ 3, (w0 >> 36) & 0xfff, parm);\ - DST(op,i*16+ 4, (w0 >> 48) & 0xfff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*3+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 5, (w0 >> 60) | (w1 << 4) & 0xfff, parm);\ - DST(op,i*16+ 6, (w1 >> 8) & 0xfff, parm);\ - DST(op,i*16+ 7, (w1 >> 20) & 0xfff, parm);\ - DST(op,i*16+ 8, (w1 >> 32) & 0xfff, parm);\ - DST(op,i*16+ 9, (w1 >> 44) & 0xfff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*3+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+10, (w1 >> 56) | (w2 << 8) & 0xfff, parm);\ - DST(op,i*16+11, (w2 >> 4) & 0xfff, parm);\ - DST(op,i*16+12, (w2 >> 16) & 0xfff, parm);\ - DST(op,i*16+13, (w2 >> 28) & 0xfff, parm);\ - DST(op,i*16+14, (w2 >> 40) & 0xfff, parm);\ - DST(op,i*16+15, (w2 >> 52) , parm);;\ -} - -#define BITUNPACK64_12(ip, op, parm) { \ - BITUNBLK64_12(ip, 0, op, parm);\ - BITUNBLK64_12(ip, 1, op, parm); DSTI(op); ip += 12*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_13(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*13+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fff, parm);\ - DST(op,i*64+ 1, (w0 >> 13) & 0x1fff, parm);\ - DST(op,i*64+ 2, (w0 >> 26) & 0x1fff, parm);\ - DST(op,i*64+ 3, (w0 >> 39) & 0x1fff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*13+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w0 >> 52) | (w1 << 12) & 0x1fff, parm);\ - DST(op,i*64+ 5, (w1 >> 1) & 0x1fff, parm);\ - DST(op,i*64+ 6, (w1 >> 14) & 0x1fff, parm);\ - DST(op,i*64+ 7, (w1 >> 27) & 0x1fff, parm);\ - DST(op,i*64+ 8, (w1 >> 40) & 0x1fff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*13+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w1 >> 53) | (w2 << 11) & 0x1fff, parm);\ - DST(op,i*64+10, (w2 >> 2) & 0x1fff, parm);\ - DST(op,i*64+11, (w2 >> 15) & 0x1fff, parm);\ - DST(op,i*64+12, (w2 >> 28) & 0x1fff, parm);\ - DST(op,i*64+13, (w2 >> 41) & 0x1fff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*13+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w2 >> 54) | (w3 << 10) & 0x1fff, parm);\ - DST(op,i*64+15, (w3 >> 3) & 0x1fff, parm);\ - DST(op,i*64+16, (w3 >> 16) & 0x1fff, parm);\ - DST(op,i*64+17, (w3 >> 29) & 0x1fff, parm);\ - DST(op,i*64+18, (w3 >> 42) & 0x1fff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*13+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w3 >> 55) | (w4 << 9) & 0x1fff, parm);\ - DST(op,i*64+20, (w4 >> 4) & 0x1fff, parm);\ - DST(op,i*64+21, (w4 >> 17) & 0x1fff, parm);\ - DST(op,i*64+22, (w4 >> 30) & 0x1fff, parm);\ - DST(op,i*64+23, (w4 >> 43) & 0x1fff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*13+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w4 >> 56) | (w5 << 8) & 0x1fff, parm);\ - DST(op,i*64+25, (w5 >> 5) & 0x1fff, parm);\ - DST(op,i*64+26, (w5 >> 18) & 0x1fff, parm);\ - DST(op,i*64+27, (w5 >> 31) & 0x1fff, parm);\ - DST(op,i*64+28, (w5 >> 44) & 0x1fff, parm); register uint64_t w6 = *(uint32_t *)(ip+(i*13+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w5 >> 57) | (w6 << 7) & 0x1fff, parm);\ - DST(op,i*64+30, (w6 >> 6) & 0x1fff, parm);\ - DST(op,i*64+31, (w6 >> 19) & 0x1fff, parm);;\ -} - -#define BITUNPACK64_13(ip, op, parm) { \ - BITUNBLK64_13(ip, 0, op, parm); DSTI(op); ip += 13*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_14(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*7+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fff, parm);\ - DST(op,i*32+ 1, (w0 >> 14) & 0x3fff, parm);\ - DST(op,i*32+ 2, (w0 >> 28) & 0x3fff, parm);\ - DST(op,i*32+ 3, (w0 >> 42) & 0x3fff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*7+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w0 >> 56) | (w1 << 8) & 0x3fff, parm);\ - DST(op,i*32+ 5, (w1 >> 6) & 0x3fff, parm);\ - DST(op,i*32+ 6, (w1 >> 20) & 0x3fff, parm);\ - DST(op,i*32+ 7, (w1 >> 34) & 0x3fff, parm);\ - DST(op,i*32+ 8, (w1 >> 48) & 0x3fff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*7+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w1 >> 62) | (w2 << 2) & 0x3fff, parm);\ - DST(op,i*32+10, (w2 >> 12) & 0x3fff, parm);\ - DST(op,i*32+11, (w2 >> 26) & 0x3fff, parm);\ - DST(op,i*32+12, (w2 >> 40) & 0x3fff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*7+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w2 >> 54) | (w3 << 10) & 0x3fff, parm);\ - DST(op,i*32+14, (w3 >> 4) & 0x3fff, parm);\ - DST(op,i*32+15, (w3 >> 18) & 0x3fff, parm);\ - DST(op,i*32+16, (w3 >> 32) & 0x3fff, parm);\ - DST(op,i*32+17, (w3 >> 46) & 0x3fff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*7+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w3 >> 60) | (w4 << 4) & 0x3fff, parm);\ - DST(op,i*32+19, (w4 >> 10) & 0x3fff, parm);\ - DST(op,i*32+20, (w4 >> 24) & 0x3fff, parm);\ - DST(op,i*32+21, (w4 >> 38) & 0x3fff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*7+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w4 >> 52) | (w5 << 12) & 0x3fff, parm);\ - DST(op,i*32+23, (w5 >> 2) & 0x3fff, parm);\ - DST(op,i*32+24, (w5 >> 16) & 0x3fff, parm);\ - DST(op,i*32+25, (w5 >> 30) & 0x3fff, parm);\ - DST(op,i*32+26, (w5 >> 44) & 0x3fff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*7+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w5 >> 58) | (w6 << 6) & 0x3fff, parm);\ - DST(op,i*32+28, (w6 >> 8) & 0x3fff, parm);\ - DST(op,i*32+29, (w6 >> 22) & 0x3fff, parm);\ - DST(op,i*32+30, (w6 >> 36) & 0x3fff, parm);\ - DST(op,i*32+31, (w6 >> 50) , parm);;\ -} - -#define BITUNPACK64_14(ip, op, parm) { \ - BITUNBLK64_14(ip, 0, op, parm); DSTI(op); ip += 14*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_15(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*15+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fff, parm);\ - DST(op,i*64+ 1, (w0 >> 15) & 0x7fff, parm);\ - DST(op,i*64+ 2, (w0 >> 30) & 0x7fff, parm);\ - DST(op,i*64+ 3, (w0 >> 45) & 0x7fff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*15+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w0 >> 60) | (w1 << 4) & 0x7fff, parm);\ - DST(op,i*64+ 5, (w1 >> 11) & 0x7fff, parm);\ - DST(op,i*64+ 6, (w1 >> 26) & 0x7fff, parm);\ - DST(op,i*64+ 7, (w1 >> 41) & 0x7fff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*15+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w1 >> 56) | (w2 << 8) & 0x7fff, parm);\ - DST(op,i*64+ 9, (w2 >> 7) & 0x7fff, parm);\ - DST(op,i*64+10, (w2 >> 22) & 0x7fff, parm);\ - DST(op,i*64+11, (w2 >> 37) & 0x7fff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*15+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w2 >> 52) | (w3 << 12) & 0x7fff, parm);\ - DST(op,i*64+13, (w3 >> 3) & 0x7fff, parm);\ - DST(op,i*64+14, (w3 >> 18) & 0x7fff, parm);\ - DST(op,i*64+15, (w3 >> 33) & 0x7fff, parm);\ - DST(op,i*64+16, (w3 >> 48) & 0x7fff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*15+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w3 >> 63) | (w4 << 1) & 0x7fff, parm);\ - DST(op,i*64+18, (w4 >> 14) & 0x7fff, parm);\ - DST(op,i*64+19, (w4 >> 29) & 0x7fff, parm);\ - DST(op,i*64+20, (w4 >> 44) & 0x7fff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*15+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w4 >> 59) | (w5 << 5) & 0x7fff, parm);\ - DST(op,i*64+22, (w5 >> 10) & 0x7fff, parm);\ - DST(op,i*64+23, (w5 >> 25) & 0x7fff, parm);\ - DST(op,i*64+24, (w5 >> 40) & 0x7fff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*15+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w5 >> 55) | (w6 << 9) & 0x7fff, parm);\ - DST(op,i*64+26, (w6 >> 6) & 0x7fff, parm);\ - DST(op,i*64+27, (w6 >> 21) & 0x7fff, parm);\ - DST(op,i*64+28, (w6 >> 36) & 0x7fff, parm); register uint64_t w7 = *(uint32_t *)(ip+(i*15+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w6 >> 51) | (w7 << 13) & 0x7fff, parm);\ - DST(op,i*64+30, (w7 >> 2) & 0x7fff, parm);\ - DST(op,i*64+31, (w7 >> 17) & 0x7fff, parm);;\ -} - -#define BITUNPACK64_15(ip, op, parm) { \ - BITUNBLK64_15(ip, 0, op, parm); DSTI(op); ip += 15*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_16(ip, i, op, parm) { \ - DST(op,i*4+ 0, *(uint16_t *)(ip+i*8+ 0), parm);\ - DST(op,i*4+ 1, *(uint16_t *)(ip+i*8+ 2), parm);\ - DST(op,i*4+ 2, *(uint16_t *)(ip+i*8+ 4), parm);\ - DST(op,i*4+ 3, *(uint16_t *)(ip+i*8+ 6), parm);;\ -} - -#define BITUNPACK64_16(ip, op, parm) { \ - BITUNBLK64_16(ip, 0, op, parm);\ - BITUNBLK64_16(ip, 1, op, parm);\ - BITUNBLK64_16(ip, 2, op, parm);\ - BITUNBLK64_16(ip, 3, op, parm);\ - BITUNBLK64_16(ip, 4, op, parm);\ - BITUNBLK64_16(ip, 5, op, parm);\ - BITUNBLK64_16(ip, 6, op, parm);\ - BITUNBLK64_16(ip, 7, op, parm); DSTI(op); ip += 16*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_17(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*17+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffff, parm);\ - DST(op,i*64+ 1, (w0 >> 17) & 0x1ffff, parm);\ - DST(op,i*64+ 2, (w0 >> 34) & 0x1ffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*17+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w0 >> 51) | (w1 << 13) & 0x1ffff, parm);\ - DST(op,i*64+ 4, (w1 >> 4) & 0x1ffff, parm);\ - DST(op,i*64+ 5, (w1 >> 21) & 0x1ffff, parm);\ - DST(op,i*64+ 6, (w1 >> 38) & 0x1ffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*17+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w1 >> 55) | (w2 << 9) & 0x1ffff, parm);\ - DST(op,i*64+ 8, (w2 >> 8) & 0x1ffff, parm);\ - DST(op,i*64+ 9, (w2 >> 25) & 0x1ffff, parm);\ - DST(op,i*64+10, (w2 >> 42) & 0x1ffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*17+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w2 >> 59) | (w3 << 5) & 0x1ffff, parm);\ - DST(op,i*64+12, (w3 >> 12) & 0x1ffff, parm);\ - DST(op,i*64+13, (w3 >> 29) & 0x1ffff, parm);\ - DST(op,i*64+14, (w3 >> 46) & 0x1ffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*17+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w3 >> 63) | (w4 << 1) & 0x1ffff, parm);\ - DST(op,i*64+16, (w4 >> 16) & 0x1ffff, parm);\ - DST(op,i*64+17, (w4 >> 33) & 0x1ffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*17+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w4 >> 50) | (w5 << 14) & 0x1ffff, parm);\ - DST(op,i*64+19, (w5 >> 3) & 0x1ffff, parm);\ - DST(op,i*64+20, (w5 >> 20) & 0x1ffff, parm);\ - DST(op,i*64+21, (w5 >> 37) & 0x1ffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*17+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w5 >> 54) | (w6 << 10) & 0x1ffff, parm);\ - DST(op,i*64+23, (w6 >> 7) & 0x1ffff, parm);\ - DST(op,i*64+24, (w6 >> 24) & 0x1ffff, parm);\ - DST(op,i*64+25, (w6 >> 41) & 0x1ffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*17+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w6 >> 58) | (w7 << 6) & 0x1ffff, parm);\ - DST(op,i*64+27, (w7 >> 11) & 0x1ffff, parm);\ - DST(op,i*64+28, (w7 >> 28) & 0x1ffff, parm);\ - DST(op,i*64+29, (w7 >> 45) & 0x1ffff, parm); register uint64_t w8 = *(uint32_t *)(ip+(i*17+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w7 >> 62) | (w8 << 2) & 0x1ffff, parm);\ - DST(op,i*64+31, (w8 >> 15) & 0x1ffff, parm);;\ -} - -#define BITUNPACK64_17(ip, op, parm) { \ - BITUNBLK64_17(ip, 0, op, parm); DSTI(op); ip += 17*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_18(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*9+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffff, parm);\ - DST(op,i*32+ 1, (w0 >> 18) & 0x3ffff, parm);\ - DST(op,i*32+ 2, (w0 >> 36) & 0x3ffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*9+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w0 >> 54) | (w1 << 10) & 0x3ffff, parm);\ - DST(op,i*32+ 4, (w1 >> 8) & 0x3ffff, parm);\ - DST(op,i*32+ 5, (w1 >> 26) & 0x3ffff, parm);\ - DST(op,i*32+ 6, (w1 >> 44) & 0x3ffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*9+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w1 >> 62) | (w2 << 2) & 0x3ffff, parm);\ - DST(op,i*32+ 8, (w2 >> 16) & 0x3ffff, parm);\ - DST(op,i*32+ 9, (w2 >> 34) & 0x3ffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*9+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w2 >> 52) | (w3 << 12) & 0x3ffff, parm);\ - DST(op,i*32+11, (w3 >> 6) & 0x3ffff, parm);\ - DST(op,i*32+12, (w3 >> 24) & 0x3ffff, parm);\ - DST(op,i*32+13, (w3 >> 42) & 0x3ffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*9+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w3 >> 60) | (w4 << 4) & 0x3ffff, parm);\ - DST(op,i*32+15, (w4 >> 14) & 0x3ffff, parm);\ - DST(op,i*32+16, (w4 >> 32) & 0x3ffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*9+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w4 >> 50) | (w5 << 14) & 0x3ffff, parm);\ - DST(op,i*32+18, (w5 >> 4) & 0x3ffff, parm);\ - DST(op,i*32+19, (w5 >> 22) & 0x3ffff, parm);\ - DST(op,i*32+20, (w5 >> 40) & 0x3ffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*9+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w5 >> 58) | (w6 << 6) & 0x3ffff, parm);\ - DST(op,i*32+22, (w6 >> 12) & 0x3ffff, parm);\ - DST(op,i*32+23, (w6 >> 30) & 0x3ffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*9+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w6 >> 48) | (w7 << 16) & 0x3ffff, parm);\ - DST(op,i*32+25, (w7 >> 2) & 0x3ffff, parm);\ - DST(op,i*32+26, (w7 >> 20) & 0x3ffff, parm);\ - DST(op,i*32+27, (w7 >> 38) & 0x3ffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*9+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w7 >> 56) | (w8 << 8) & 0x3ffff, parm);\ - DST(op,i*32+29, (w8 >> 10) & 0x3ffff, parm);\ - DST(op,i*32+30, (w8 >> 28) & 0x3ffff, parm);\ - DST(op,i*32+31, (w8 >> 46) , parm);;\ -} - -#define BITUNPACK64_18(ip, op, parm) { \ - BITUNBLK64_18(ip, 0, op, parm); DSTI(op); ip += 18*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_19(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*19+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffff, parm);\ - DST(op,i*64+ 1, (w0 >> 19) & 0x7ffff, parm);\ - DST(op,i*64+ 2, (w0 >> 38) & 0x7ffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*19+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w0 >> 57) | (w1 << 7) & 0x7ffff, parm);\ - DST(op,i*64+ 4, (w1 >> 12) & 0x7ffff, parm);\ - DST(op,i*64+ 5, (w1 >> 31) & 0x7ffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*19+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w1 >> 50) | (w2 << 14) & 0x7ffff, parm);\ - DST(op,i*64+ 7, (w2 >> 5) & 0x7ffff, parm);\ - DST(op,i*64+ 8, (w2 >> 24) & 0x7ffff, parm);\ - DST(op,i*64+ 9, (w2 >> 43) & 0x7ffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*19+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w2 >> 62) | (w3 << 2) & 0x7ffff, parm);\ - DST(op,i*64+11, (w3 >> 17) & 0x7ffff, parm);\ - DST(op,i*64+12, (w3 >> 36) & 0x7ffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*19+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w3 >> 55) | (w4 << 9) & 0x7ffff, parm);\ - DST(op,i*64+14, (w4 >> 10) & 0x7ffff, parm);\ - DST(op,i*64+15, (w4 >> 29) & 0x7ffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*19+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w4 >> 48) | (w5 << 16) & 0x7ffff, parm);\ - DST(op,i*64+17, (w5 >> 3) & 0x7ffff, parm);\ - DST(op,i*64+18, (w5 >> 22) & 0x7ffff, parm);\ - DST(op,i*64+19, (w5 >> 41) & 0x7ffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*19+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w5 >> 60) | (w6 << 4) & 0x7ffff, parm);\ - DST(op,i*64+21, (w6 >> 15) & 0x7ffff, parm);\ - DST(op,i*64+22, (w6 >> 34) & 0x7ffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*19+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w6 >> 53) | (w7 << 11) & 0x7ffff, parm);\ - DST(op,i*64+24, (w7 >> 8) & 0x7ffff, parm);\ - DST(op,i*64+25, (w7 >> 27) & 0x7ffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*19+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w7 >> 46) | (w8 << 18) & 0x7ffff, parm);\ - DST(op,i*64+27, (w8 >> 1) & 0x7ffff, parm);\ - DST(op,i*64+28, (w8 >> 20) & 0x7ffff, parm);\ - DST(op,i*64+29, (w8 >> 39) & 0x7ffff, parm); register uint64_t w9 = *(uint32_t *)(ip+(i*19+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w8 >> 58) | (w9 << 6) & 0x7ffff, parm);\ - DST(op,i*64+31, (w9 >> 13) & 0x7ffff, parm);;\ -} - -#define BITUNPACK64_19(ip, op, parm) { \ - BITUNBLK64_19(ip, 0, op, parm); DSTI(op); ip += 19*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_20(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*5+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffff, parm);\ - DST(op,i*16+ 1, (w0 >> 20) & 0xfffff, parm);\ - DST(op,i*16+ 2, (w0 >> 40) & 0xfffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*5+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 3, (w0 >> 60) | (w1 << 4) & 0xfffff, parm);\ - DST(op,i*16+ 4, (w1 >> 16) & 0xfffff, parm);\ - DST(op,i*16+ 5, (w1 >> 36) & 0xfffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*5+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 6, (w1 >> 56) | (w2 << 8) & 0xfffff, parm);\ - DST(op,i*16+ 7, (w2 >> 12) & 0xfffff, parm);\ - DST(op,i*16+ 8, (w2 >> 32) & 0xfffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*5+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 9, (w2 >> 52) | (w3 << 12) & 0xfffff, parm);\ - DST(op,i*16+10, (w3 >> 8) & 0xfffff, parm);\ - DST(op,i*16+11, (w3 >> 28) & 0xfffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*5+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+12, (w3 >> 48) | (w4 << 16) & 0xfffff, parm);\ - DST(op,i*16+13, (w4 >> 4) & 0xfffff, parm);\ - DST(op,i*16+14, (w4 >> 24) & 0xfffff, parm);\ - DST(op,i*16+15, (w4 >> 44) , parm);;\ -} - -#define BITUNPACK64_20(ip, op, parm) { \ - BITUNBLK64_20(ip, 0, op, parm);\ - BITUNBLK64_20(ip, 1, op, parm); DSTI(op); ip += 20*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_21(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*21+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffff, parm);\ - DST(op,i*64+ 1, (w0 >> 21) & 0x1fffff, parm);\ - DST(op,i*64+ 2, (w0 >> 42) & 0x1fffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*21+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w0 >> 63) | (w1 << 1) & 0x1fffff, parm);\ - DST(op,i*64+ 4, (w1 >> 20) & 0x1fffff, parm);\ - DST(op,i*64+ 5, (w1 >> 41) & 0x1fffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*21+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w1 >> 62) | (w2 << 2) & 0x1fffff, parm);\ - DST(op,i*64+ 7, (w2 >> 19) & 0x1fffff, parm);\ - DST(op,i*64+ 8, (w2 >> 40) & 0x1fffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*21+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w2 >> 61) | (w3 << 3) & 0x1fffff, parm);\ - DST(op,i*64+10, (w3 >> 18) & 0x1fffff, parm);\ - DST(op,i*64+11, (w3 >> 39) & 0x1fffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*21+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w3 >> 60) | (w4 << 4) & 0x1fffff, parm);\ - DST(op,i*64+13, (w4 >> 17) & 0x1fffff, parm);\ - DST(op,i*64+14, (w4 >> 38) & 0x1fffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*21+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w4 >> 59) | (w5 << 5) & 0x1fffff, parm);\ - DST(op,i*64+16, (w5 >> 16) & 0x1fffff, parm);\ - DST(op,i*64+17, (w5 >> 37) & 0x1fffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*21+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w5 >> 58) | (w6 << 6) & 0x1fffff, parm);\ - DST(op,i*64+19, (w6 >> 15) & 0x1fffff, parm);\ - DST(op,i*64+20, (w6 >> 36) & 0x1fffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*21+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w6 >> 57) | (w7 << 7) & 0x1fffff, parm);\ - DST(op,i*64+22, (w7 >> 14) & 0x1fffff, parm);\ - DST(op,i*64+23, (w7 >> 35) & 0x1fffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*21+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w7 >> 56) | (w8 << 8) & 0x1fffff, parm);\ - DST(op,i*64+25, (w8 >> 13) & 0x1fffff, parm);\ - DST(op,i*64+26, (w8 >> 34) & 0x1fffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*21+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w8 >> 55) | (w9 << 9) & 0x1fffff, parm);\ - DST(op,i*64+28, (w9 >> 12) & 0x1fffff, parm);\ - DST(op,i*64+29, (w9 >> 33) & 0x1fffff, parm); register uint64_t w10 = *(uint32_t *)(ip+(i*21+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w9 >> 54) | (w10 << 10) & 0x1fffff, parm);\ - DST(op,i*64+31, (w10 >> 11) & 0x1fffff, parm);;\ -} - -#define BITUNPACK64_21(ip, op, parm) { \ - BITUNBLK64_21(ip, 0, op, parm); DSTI(op); ip += 21*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_22(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*11+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffff, parm);\ - DST(op,i*32+ 1, (w0 >> 22) & 0x3fffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*11+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w0 >> 44) | (w1 << 20) & 0x3fffff, parm);\ - DST(op,i*32+ 3, (w1 >> 2) & 0x3fffff, parm);\ - DST(op,i*32+ 4, (w1 >> 24) & 0x3fffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*11+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w1 >> 46) | (w2 << 18) & 0x3fffff, parm);\ - DST(op,i*32+ 6, (w2 >> 4) & 0x3fffff, parm);\ - DST(op,i*32+ 7, (w2 >> 26) & 0x3fffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*11+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w2 >> 48) | (w3 << 16) & 0x3fffff, parm);\ - DST(op,i*32+ 9, (w3 >> 6) & 0x3fffff, parm);\ - DST(op,i*32+10, (w3 >> 28) & 0x3fffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*11+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w3 >> 50) | (w4 << 14) & 0x3fffff, parm);\ - DST(op,i*32+12, (w4 >> 8) & 0x3fffff, parm);\ - DST(op,i*32+13, (w4 >> 30) & 0x3fffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*11+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w4 >> 52) | (w5 << 12) & 0x3fffff, parm);\ - DST(op,i*32+15, (w5 >> 10) & 0x3fffff, parm);\ - DST(op,i*32+16, (w5 >> 32) & 0x3fffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*11+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w5 >> 54) | (w6 << 10) & 0x3fffff, parm);\ - DST(op,i*32+18, (w6 >> 12) & 0x3fffff, parm);\ - DST(op,i*32+19, (w6 >> 34) & 0x3fffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*11+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w6 >> 56) | (w7 << 8) & 0x3fffff, parm);\ - DST(op,i*32+21, (w7 >> 14) & 0x3fffff, parm);\ - DST(op,i*32+22, (w7 >> 36) & 0x3fffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*11+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w7 >> 58) | (w8 << 6) & 0x3fffff, parm);\ - DST(op,i*32+24, (w8 >> 16) & 0x3fffff, parm);\ - DST(op,i*32+25, (w8 >> 38) & 0x3fffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*11+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w8 >> 60) | (w9 << 4) & 0x3fffff, parm);\ - DST(op,i*32+27, (w9 >> 18) & 0x3fffff, parm);\ - DST(op,i*32+28, (w9 >> 40) & 0x3fffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*11+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w9 >> 62) | (w10 << 2) & 0x3fffff, parm);\ - DST(op,i*32+30, (w10 >> 20) & 0x3fffff, parm);\ - DST(op,i*32+31, (w10 >> 42) , parm);;\ -} - -#define BITUNPACK64_22(ip, op, parm) { \ - BITUNBLK64_22(ip, 0, op, parm); DSTI(op); ip += 22*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_23(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*23+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffff, parm);\ - DST(op,i*64+ 1, (w0 >> 23) & 0x7fffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*23+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w0 >> 46) | (w1 << 18) & 0x7fffff, parm);\ - DST(op,i*64+ 3, (w1 >> 5) & 0x7fffff, parm);\ - DST(op,i*64+ 4, (w1 >> 28) & 0x7fffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*23+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w1 >> 51) | (w2 << 13) & 0x7fffff, parm);\ - DST(op,i*64+ 6, (w2 >> 10) & 0x7fffff, parm);\ - DST(op,i*64+ 7, (w2 >> 33) & 0x7fffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*23+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w2 >> 56) | (w3 << 8) & 0x7fffff, parm);\ - DST(op,i*64+ 9, (w3 >> 15) & 0x7fffff, parm);\ - DST(op,i*64+10, (w3 >> 38) & 0x7fffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*23+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w3 >> 61) | (w4 << 3) & 0x7fffff, parm);\ - DST(op,i*64+12, (w4 >> 20) & 0x7fffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*23+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w4 >> 43) | (w5 << 21) & 0x7fffff, parm);\ - DST(op,i*64+14, (w5 >> 2) & 0x7fffff, parm);\ - DST(op,i*64+15, (w5 >> 25) & 0x7fffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*23+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w5 >> 48) | (w6 << 16) & 0x7fffff, parm);\ - DST(op,i*64+17, (w6 >> 7) & 0x7fffff, parm);\ - DST(op,i*64+18, (w6 >> 30) & 0x7fffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*23+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w6 >> 53) | (w7 << 11) & 0x7fffff, parm);\ - DST(op,i*64+20, (w7 >> 12) & 0x7fffff, parm);\ - DST(op,i*64+21, (w7 >> 35) & 0x7fffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*23+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w7 >> 58) | (w8 << 6) & 0x7fffff, parm);\ - DST(op,i*64+23, (w8 >> 17) & 0x7fffff, parm);\ - DST(op,i*64+24, (w8 >> 40) & 0x7fffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*23+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w8 >> 63) | (w9 << 1) & 0x7fffff, parm);\ - DST(op,i*64+26, (w9 >> 22) & 0x7fffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*23+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w9 >> 45) | (w10 << 19) & 0x7fffff, parm);\ - DST(op,i*64+28, (w10 >> 4) & 0x7fffff, parm);\ - DST(op,i*64+29, (w10 >> 27) & 0x7fffff, parm); register uint64_t w11 = *(uint32_t *)(ip+(i*23+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w10 >> 50) | (w11 << 14) & 0x7fffff, parm);\ - DST(op,i*64+31, (w11 >> 9) & 0x7fffff, parm);;\ -} - -#define BITUNPACK64_23(ip, op, parm) { \ - BITUNBLK64_23(ip, 0, op, parm); DSTI(op); ip += 23*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_24(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*3+0)*8/sizeof(ip[0]));\ - DST(op,i*8+ 0, (w0 ) & 0xffffff, parm);\ - DST(op,i*8+ 1, (w0 >> 24) & 0xffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*3+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 2, (w0 >> 48) | (w1 << 16) & 0xffffff, parm);\ - DST(op,i*8+ 3, (w1 >> 8) & 0xffffff, parm);\ - DST(op,i*8+ 4, (w1 >> 32) & 0xffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*3+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 5, (w1 >> 56) | (w2 << 8) & 0xffffff, parm);\ - DST(op,i*8+ 6, (w2 >> 16) & 0xffffff, parm);\ - DST(op,i*8+ 7, (w2 >> 40) , parm);;\ -} - -#define BITUNPACK64_24(ip, op, parm) { \ - BITUNBLK64_24(ip, 0, op, parm);\ - BITUNBLK64_24(ip, 1, op, parm);\ - BITUNBLK64_24(ip, 2, op, parm);\ - BITUNBLK64_24(ip, 3, op, parm); DSTI(op); ip += 24*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_25(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*25+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffffff, parm);\ - DST(op,i*64+ 1, (w0 >> 25) & 0x1ffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*25+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w0 >> 50) | (w1 << 14) & 0x1ffffff, parm);\ - DST(op,i*64+ 3, (w1 >> 11) & 0x1ffffff, parm);\ - DST(op,i*64+ 4, (w1 >> 36) & 0x1ffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*25+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w1 >> 61) | (w2 << 3) & 0x1ffffff, parm);\ - DST(op,i*64+ 6, (w2 >> 22) & 0x1ffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*25+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w2 >> 47) | (w3 << 17) & 0x1ffffff, parm);\ - DST(op,i*64+ 8, (w3 >> 8) & 0x1ffffff, parm);\ - DST(op,i*64+ 9, (w3 >> 33) & 0x1ffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*25+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w3 >> 58) | (w4 << 6) & 0x1ffffff, parm);\ - DST(op,i*64+11, (w4 >> 19) & 0x1ffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*25+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w4 >> 44) | (w5 << 20) & 0x1ffffff, parm);\ - DST(op,i*64+13, (w5 >> 5) & 0x1ffffff, parm);\ - DST(op,i*64+14, (w5 >> 30) & 0x1ffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*25+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w5 >> 55) | (w6 << 9) & 0x1ffffff, parm);\ - DST(op,i*64+16, (w6 >> 16) & 0x1ffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*25+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w6 >> 41) | (w7 << 23) & 0x1ffffff, parm);\ - DST(op,i*64+18, (w7 >> 2) & 0x1ffffff, parm);\ - DST(op,i*64+19, (w7 >> 27) & 0x1ffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*25+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w7 >> 52) | (w8 << 12) & 0x1ffffff, parm);\ - DST(op,i*64+21, (w8 >> 13) & 0x1ffffff, parm);\ - DST(op,i*64+22, (w8 >> 38) & 0x1ffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*25+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w8 >> 63) | (w9 << 1) & 0x1ffffff, parm);\ - DST(op,i*64+24, (w9 >> 24) & 0x1ffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*25+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w9 >> 49) | (w10 << 15) & 0x1ffffff, parm);\ - DST(op,i*64+26, (w10 >> 10) & 0x1ffffff, parm);\ - DST(op,i*64+27, (w10 >> 35) & 0x1ffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*25+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w10 >> 60) | (w11 << 4) & 0x1ffffff, parm);\ - DST(op,i*64+29, (w11 >> 21) & 0x1ffffff, parm); register uint64_t w12 = *(uint32_t *)(ip+(i*25+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w11 >> 46) | (w12 << 18) & 0x1ffffff, parm);\ - DST(op,i*64+31, (w12 >> 7) & 0x1ffffff, parm);;\ -} - -#define BITUNPACK64_25(ip, op, parm) { \ - BITUNBLK64_25(ip, 0, op, parm); DSTI(op); ip += 25*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_26(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*13+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffffff, parm);\ - DST(op,i*32+ 1, (w0 >> 26) & 0x3ffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*13+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w0 >> 52) | (w1 << 12) & 0x3ffffff, parm);\ - DST(op,i*32+ 3, (w1 >> 14) & 0x3ffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*13+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w1 >> 40) | (w2 << 24) & 0x3ffffff, parm);\ - DST(op,i*32+ 5, (w2 >> 2) & 0x3ffffff, parm);\ - DST(op,i*32+ 6, (w2 >> 28) & 0x3ffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*13+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w2 >> 54) | (w3 << 10) & 0x3ffffff, parm);\ - DST(op,i*32+ 8, (w3 >> 16) & 0x3ffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*13+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w3 >> 42) | (w4 << 22) & 0x3ffffff, parm);\ - DST(op,i*32+10, (w4 >> 4) & 0x3ffffff, parm);\ - DST(op,i*32+11, (w4 >> 30) & 0x3ffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*13+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w4 >> 56) | (w5 << 8) & 0x3ffffff, parm);\ - DST(op,i*32+13, (w5 >> 18) & 0x3ffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*13+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w5 >> 44) | (w6 << 20) & 0x3ffffff, parm);\ - DST(op,i*32+15, (w6 >> 6) & 0x3ffffff, parm);\ - DST(op,i*32+16, (w6 >> 32) & 0x3ffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*13+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w6 >> 58) | (w7 << 6) & 0x3ffffff, parm);\ - DST(op,i*32+18, (w7 >> 20) & 0x3ffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*13+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w7 >> 46) | (w8 << 18) & 0x3ffffff, parm);\ - DST(op,i*32+20, (w8 >> 8) & 0x3ffffff, parm);\ - DST(op,i*32+21, (w8 >> 34) & 0x3ffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*13+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w8 >> 60) | (w9 << 4) & 0x3ffffff, parm);\ - DST(op,i*32+23, (w9 >> 22) & 0x3ffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*13+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w9 >> 48) | (w10 << 16) & 0x3ffffff, parm);\ - DST(op,i*32+25, (w10 >> 10) & 0x3ffffff, parm);\ - DST(op,i*32+26, (w10 >> 36) & 0x3ffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*13+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w10 >> 62) | (w11 << 2) & 0x3ffffff, parm);\ - DST(op,i*32+28, (w11 >> 24) & 0x3ffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*13+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w11 >> 50) | (w12 << 14) & 0x3ffffff, parm);\ - DST(op,i*32+30, (w12 >> 12) & 0x3ffffff, parm);\ - DST(op,i*32+31, (w12 >> 38) , parm);;\ -} - -#define BITUNPACK64_26(ip, op, parm) { \ - BITUNBLK64_26(ip, 0, op, parm); DSTI(op); ip += 26*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_27(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*27+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffffff, parm);\ - DST(op,i*64+ 1, (w0 >> 27) & 0x7ffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*27+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w0 >> 54) | (w1 << 10) & 0x7ffffff, parm);\ - DST(op,i*64+ 3, (w1 >> 17) & 0x7ffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*27+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w1 >> 44) | (w2 << 20) & 0x7ffffff, parm);\ - DST(op,i*64+ 5, (w2 >> 7) & 0x7ffffff, parm);\ - DST(op,i*64+ 6, (w2 >> 34) & 0x7ffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*27+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w2 >> 61) | (w3 << 3) & 0x7ffffff, parm);\ - DST(op,i*64+ 8, (w3 >> 24) & 0x7ffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*27+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w3 >> 51) | (w4 << 13) & 0x7ffffff, parm);\ - DST(op,i*64+10, (w4 >> 14) & 0x7ffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*27+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w4 >> 41) | (w5 << 23) & 0x7ffffff, parm);\ - DST(op,i*64+12, (w5 >> 4) & 0x7ffffff, parm);\ - DST(op,i*64+13, (w5 >> 31) & 0x7ffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*27+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w5 >> 58) | (w6 << 6) & 0x7ffffff, parm);\ - DST(op,i*64+15, (w6 >> 21) & 0x7ffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*27+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w6 >> 48) | (w7 << 16) & 0x7ffffff, parm);\ - DST(op,i*64+17, (w7 >> 11) & 0x7ffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*27+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w7 >> 38) | (w8 << 26) & 0x7ffffff, parm);\ - DST(op,i*64+19, (w8 >> 1) & 0x7ffffff, parm);\ - DST(op,i*64+20, (w8 >> 28) & 0x7ffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*27+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w8 >> 55) | (w9 << 9) & 0x7ffffff, parm);\ - DST(op,i*64+22, (w9 >> 18) & 0x7ffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*27+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w9 >> 45) | (w10 << 19) & 0x7ffffff, parm);\ - DST(op,i*64+24, (w10 >> 8) & 0x7ffffff, parm);\ - DST(op,i*64+25, (w10 >> 35) & 0x7ffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*27+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w10 >> 62) | (w11 << 2) & 0x7ffffff, parm);\ - DST(op,i*64+27, (w11 >> 25) & 0x7ffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*27+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w11 >> 52) | (w12 << 12) & 0x7ffffff, parm);\ - DST(op,i*64+29, (w12 >> 15) & 0x7ffffff, parm); register uint64_t w13 = *(uint32_t *)(ip+(i*27+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w12 >> 42) | (w13 << 22) & 0x7ffffff, parm);\ - DST(op,i*64+31, (w13 >> 5) & 0x7ffffff, parm);;\ -} - -#define BITUNPACK64_27(ip, op, parm) { \ - BITUNBLK64_27(ip, 0, op, parm); DSTI(op); ip += 27*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_28(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*7+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffffff, parm);\ - DST(op,i*16+ 1, (w0 >> 28) & 0xfffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*7+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 2, (w0 >> 56) | (w1 << 8) & 0xfffffff, parm);\ - DST(op,i*16+ 3, (w1 >> 20) & 0xfffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*7+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 4, (w1 >> 48) | (w2 << 16) & 0xfffffff, parm);\ - DST(op,i*16+ 5, (w2 >> 12) & 0xfffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*7+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 6, (w2 >> 40) | (w3 << 24) & 0xfffffff, parm);\ - DST(op,i*16+ 7, (w3 >> 4) & 0xfffffff, parm);\ - DST(op,i*16+ 8, (w3 >> 32) & 0xfffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*7+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 9, (w3 >> 60) | (w4 << 4) & 0xfffffff, parm);\ - DST(op,i*16+10, (w4 >> 24) & 0xfffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*7+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+11, (w4 >> 52) | (w5 << 12) & 0xfffffff, parm);\ - DST(op,i*16+12, (w5 >> 16) & 0xfffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*7+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+13, (w5 >> 44) | (w6 << 20) & 0xfffffff, parm);\ - DST(op,i*16+14, (w6 >> 8) & 0xfffffff, parm);\ - DST(op,i*16+15, (w6 >> 36) , parm);;\ -} - -#define BITUNPACK64_28(ip, op, parm) { \ - BITUNBLK64_28(ip, 0, op, parm);\ - BITUNBLK64_28(ip, 1, op, parm); DSTI(op); ip += 28*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_29(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*29+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffffff, parm);\ - DST(op,i*64+ 1, (w0 >> 29) & 0x1fffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*29+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w0 >> 58) | (w1 << 6) & 0x1fffffff, parm);\ - DST(op,i*64+ 3, (w1 >> 23) & 0x1fffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*29+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w1 >> 52) | (w2 << 12) & 0x1fffffff, parm);\ - DST(op,i*64+ 5, (w2 >> 17) & 0x1fffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*29+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w2 >> 46) | (w3 << 18) & 0x1fffffff, parm);\ - DST(op,i*64+ 7, (w3 >> 11) & 0x1fffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*29+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w3 >> 40) | (w4 << 24) & 0x1fffffff, parm);\ - DST(op,i*64+ 9, (w4 >> 5) & 0x1fffffff, parm);\ - DST(op,i*64+10, (w4 >> 34) & 0x1fffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*29+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w4 >> 63) | (w5 << 1) & 0x1fffffff, parm);\ - DST(op,i*64+12, (w5 >> 28) & 0x1fffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*29+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w5 >> 57) | (w6 << 7) & 0x1fffffff, parm);\ - DST(op,i*64+14, (w6 >> 22) & 0x1fffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*29+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w6 >> 51) | (w7 << 13) & 0x1fffffff, parm);\ - DST(op,i*64+16, (w7 >> 16) & 0x1fffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*29+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w7 >> 45) | (w8 << 19) & 0x1fffffff, parm);\ - DST(op,i*64+18, (w8 >> 10) & 0x1fffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*29+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w8 >> 39) | (w9 << 25) & 0x1fffffff, parm);\ - DST(op,i*64+20, (w9 >> 4) & 0x1fffffff, parm);\ - DST(op,i*64+21, (w9 >> 33) & 0x1fffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*29+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w9 >> 62) | (w10 << 2) & 0x1fffffff, parm);\ - DST(op,i*64+23, (w10 >> 27) & 0x1fffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*29+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w10 >> 56) | (w11 << 8) & 0x1fffffff, parm);\ - DST(op,i*64+25, (w11 >> 21) & 0x1fffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*29+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w11 >> 50) | (w12 << 14) & 0x1fffffff, parm);\ - DST(op,i*64+27, (w12 >> 15) & 0x1fffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*29+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w12 >> 44) | (w13 << 20) & 0x1fffffff, parm);\ - DST(op,i*64+29, (w13 >> 9) & 0x1fffffff, parm); register uint64_t w14 = *(uint32_t *)(ip+(i*29+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w13 >> 38) | (w14 << 26) & 0x1fffffff, parm);\ - DST(op,i*64+31, (w14 >> 3) & 0x1fffffff, parm);;\ -} - -#define BITUNPACK64_29(ip, op, parm) { \ - BITUNBLK64_29(ip, 0, op, parm); DSTI(op); ip += 29*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_30(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*15+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffffff, parm);\ - DST(op,i*32+ 1, (w0 >> 30) & 0x3fffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*15+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w0 >> 60) | (w1 << 4) & 0x3fffffff, parm);\ - DST(op,i*32+ 3, (w1 >> 26) & 0x3fffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*15+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w1 >> 56) | (w2 << 8) & 0x3fffffff, parm);\ - DST(op,i*32+ 5, (w2 >> 22) & 0x3fffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*15+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w2 >> 52) | (w3 << 12) & 0x3fffffff, parm);\ - DST(op,i*32+ 7, (w3 >> 18) & 0x3fffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*15+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w3 >> 48) | (w4 << 16) & 0x3fffffff, parm);\ - DST(op,i*32+ 9, (w4 >> 14) & 0x3fffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*15+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w4 >> 44) | (w5 << 20) & 0x3fffffff, parm);\ - DST(op,i*32+11, (w5 >> 10) & 0x3fffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*15+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w5 >> 40) | (w6 << 24) & 0x3fffffff, parm);\ - DST(op,i*32+13, (w6 >> 6) & 0x3fffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*15+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w6 >> 36) | (w7 << 28) & 0x3fffffff, parm);\ - DST(op,i*32+15, (w7 >> 2) & 0x3fffffff, parm);\ - DST(op,i*32+16, (w7 >> 32) & 0x3fffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*15+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w7 >> 62) | (w8 << 2) & 0x3fffffff, parm);\ - DST(op,i*32+18, (w8 >> 28) & 0x3fffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*15+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w8 >> 58) | (w9 << 6) & 0x3fffffff, parm);\ - DST(op,i*32+20, (w9 >> 24) & 0x3fffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*15+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w9 >> 54) | (w10 << 10) & 0x3fffffff, parm);\ - DST(op,i*32+22, (w10 >> 20) & 0x3fffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*15+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w10 >> 50) | (w11 << 14) & 0x3fffffff, parm);\ - DST(op,i*32+24, (w11 >> 16) & 0x3fffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*15+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w11 >> 46) | (w12 << 18) & 0x3fffffff, parm);\ - DST(op,i*32+26, (w12 >> 12) & 0x3fffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*15+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w12 >> 42) | (w13 << 22) & 0x3fffffff, parm);\ - DST(op,i*32+28, (w13 >> 8) & 0x3fffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*15+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w13 >> 38) | (w14 << 26) & 0x3fffffff, parm);\ - DST(op,i*32+30, (w14 >> 4) & 0x3fffffff, parm);\ - DST(op,i*32+31, (w14 >> 34) , parm);;\ -} - -#define BITUNPACK64_30(ip, op, parm) { \ - BITUNBLK64_30(ip, 0, op, parm); DSTI(op); ip += 30*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_31(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*31+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffffff, parm);\ - DST(op,i*64+ 1, (w0 >> 31) & 0x7fffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*31+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w0 >> 62) | (w1 << 2) & 0x7fffffff, parm);\ - DST(op,i*64+ 3, (w1 >> 29) & 0x7fffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*31+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w1 >> 60) | (w2 << 4) & 0x7fffffff, parm);\ - DST(op,i*64+ 5, (w2 >> 27) & 0x7fffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*31+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w2 >> 58) | (w3 << 6) & 0x7fffffff, parm);\ - DST(op,i*64+ 7, (w3 >> 25) & 0x7fffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*31+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w3 >> 56) | (w4 << 8) & 0x7fffffff, parm);\ - DST(op,i*64+ 9, (w4 >> 23) & 0x7fffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*31+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w4 >> 54) | (w5 << 10) & 0x7fffffff, parm);\ - DST(op,i*64+11, (w5 >> 21) & 0x7fffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*31+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w5 >> 52) | (w6 << 12) & 0x7fffffff, parm);\ - DST(op,i*64+13, (w6 >> 19) & 0x7fffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*31+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w6 >> 50) | (w7 << 14) & 0x7fffffff, parm);\ - DST(op,i*64+15, (w7 >> 17) & 0x7fffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*31+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w7 >> 48) | (w8 << 16) & 0x7fffffff, parm);\ - DST(op,i*64+17, (w8 >> 15) & 0x7fffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*31+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w8 >> 46) | (w9 << 18) & 0x7fffffff, parm);\ - DST(op,i*64+19, (w9 >> 13) & 0x7fffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*31+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w9 >> 44) | (w10 << 20) & 0x7fffffff, parm);\ - DST(op,i*64+21, (w10 >> 11) & 0x7fffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*31+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w10 >> 42) | (w11 << 22) & 0x7fffffff, parm);\ - DST(op,i*64+23, (w11 >> 9) & 0x7fffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*31+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w11 >> 40) | (w12 << 24) & 0x7fffffff, parm);\ - DST(op,i*64+25, (w12 >> 7) & 0x7fffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*31+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w12 >> 38) | (w13 << 26) & 0x7fffffff, parm);\ - DST(op,i*64+27, (w13 >> 5) & 0x7fffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*31+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w13 >> 36) | (w14 << 28) & 0x7fffffff, parm);\ - DST(op,i*64+29, (w14 >> 3) & 0x7fffffff, parm); register uint64_t w15 = *(uint32_t *)(ip+(i*31+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w14 >> 34) | (w15 << 30) & 0x7fffffff, parm);\ - DST(op,i*64+31, (w15 >> 1) & 0x7fffffff, parm);;\ -} - -#define BITUNPACK64_31(ip, op, parm) { \ - BITUNBLK64_31(ip, 0, op, parm); DSTI(op); ip += 31*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_32(ip, i, op, parm) { \ - DST(op,i*2+ 0, *(uint32_t *)(ip+i*8+ 0), parm);\ - DST(op,i*2+ 1, *(uint32_t *)(ip+i*8+ 4), parm);;\ -} - -#define BITUNPACK64_32(ip, op, parm) { \ - BITUNBLK64_32(ip, 0, op, parm);\ - BITUNBLK64_32(ip, 1, op, parm);\ - BITUNBLK64_32(ip, 2, op, parm);\ - BITUNBLK64_32(ip, 3, op, parm);\ - BITUNBLK64_32(ip, 4, op, parm);\ - BITUNBLK64_32(ip, 5, op, parm);\ - BITUNBLK64_32(ip, 6, op, parm);\ - BITUNBLK64_32(ip, 7, op, parm);\ - BITUNBLK64_32(ip, 8, op, parm);\ - BITUNBLK64_32(ip, 9, op, parm);\ - BITUNBLK64_32(ip, 10, op, parm);\ - BITUNBLK64_32(ip, 11, op, parm);\ - BITUNBLK64_32(ip, 12, op, parm);\ - BITUNBLK64_32(ip, 13, op, parm);\ - BITUNBLK64_32(ip, 14, op, parm);\ - BITUNBLK64_32(ip, 15, op, parm); DSTI(op); ip += 32*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_33(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*33+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*33+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 33) | (w1 << 31) & 0x1ffffffff, parm);\ - DST(op,i*64+ 2, (w1 >> 2) & 0x1ffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*33+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w1 >> 35) | (w2 << 29) & 0x1ffffffff, parm);\ - DST(op,i*64+ 4, (w2 >> 4) & 0x1ffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*33+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w2 >> 37) | (w3 << 27) & 0x1ffffffff, parm);\ - DST(op,i*64+ 6, (w3 >> 6) & 0x1ffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*33+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w3 >> 39) | (w4 << 25) & 0x1ffffffff, parm);\ - DST(op,i*64+ 8, (w4 >> 8) & 0x1ffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*33+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w4 >> 41) | (w5 << 23) & 0x1ffffffff, parm);\ - DST(op,i*64+10, (w5 >> 10) & 0x1ffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*33+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w5 >> 43) | (w6 << 21) & 0x1ffffffff, parm);\ - DST(op,i*64+12, (w6 >> 12) & 0x1ffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*33+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w6 >> 45) | (w7 << 19) & 0x1ffffffff, parm);\ - DST(op,i*64+14, (w7 >> 14) & 0x1ffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*33+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w7 >> 47) | (w8 << 17) & 0x1ffffffff, parm);\ - DST(op,i*64+16, (w8 >> 16) & 0x1ffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*33+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w8 >> 49) | (w9 << 15) & 0x1ffffffff, parm);\ - DST(op,i*64+18, (w9 >> 18) & 0x1ffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*33+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w9 >> 51) | (w10 << 13) & 0x1ffffffff, parm);\ - DST(op,i*64+20, (w10 >> 20) & 0x1ffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*33+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w10 >> 53) | (w11 << 11) & 0x1ffffffff, parm);\ - DST(op,i*64+22, (w11 >> 22) & 0x1ffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*33+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w11 >> 55) | (w12 << 9) & 0x1ffffffff, parm);\ - DST(op,i*64+24, (w12 >> 24) & 0x1ffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*33+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w12 >> 57) | (w13 << 7) & 0x1ffffffff, parm);\ - DST(op,i*64+26, (w13 >> 26) & 0x1ffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*33+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w13 >> 59) | (w14 << 5) & 0x1ffffffff, parm);\ - DST(op,i*64+28, (w14 >> 28) & 0x1ffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*33+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w14 >> 61) | (w15 << 3) & 0x1ffffffff, parm);\ - DST(op,i*64+30, (w15 >> 30) & 0x1ffffffff, parm); register uint64_t w16 = *(uint32_t *)(ip+(i*33+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w15 >> 63) | (w16 << 1) & 0x1ffffffff, parm);;\ -} - -#define BITUNPACK64_33(ip, op, parm) { \ - BITUNBLK64_33(ip, 0, op, parm); DSTI(op); ip += 33*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_34(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*17+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*17+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 34) | (w1 << 30) & 0x3ffffffff, parm);\ - DST(op,i*32+ 2, (w1 >> 4) & 0x3ffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*17+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w1 >> 38) | (w2 << 26) & 0x3ffffffff, parm);\ - DST(op,i*32+ 4, (w2 >> 8) & 0x3ffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*17+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w2 >> 42) | (w3 << 22) & 0x3ffffffff, parm);\ - DST(op,i*32+ 6, (w3 >> 12) & 0x3ffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*17+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w3 >> 46) | (w4 << 18) & 0x3ffffffff, parm);\ - DST(op,i*32+ 8, (w4 >> 16) & 0x3ffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*17+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w4 >> 50) | (w5 << 14) & 0x3ffffffff, parm);\ - DST(op,i*32+10, (w5 >> 20) & 0x3ffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*17+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w5 >> 54) | (w6 << 10) & 0x3ffffffff, parm);\ - DST(op,i*32+12, (w6 >> 24) & 0x3ffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*17+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w6 >> 58) | (w7 << 6) & 0x3ffffffff, parm);\ - DST(op,i*32+14, (w7 >> 28) & 0x3ffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*17+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w7 >> 62) | (w8 << 2) & 0x3ffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*17+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w8 >> 32) | (w9 << 32) & 0x3ffffffff, parm);\ - DST(op,i*32+17, (w9 >> 2) & 0x3ffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*17+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w9 >> 36) | (w10 << 28) & 0x3ffffffff, parm);\ - DST(op,i*32+19, (w10 >> 6) & 0x3ffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*17+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w10 >> 40) | (w11 << 24) & 0x3ffffffff, parm);\ - DST(op,i*32+21, (w11 >> 10) & 0x3ffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*17+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w11 >> 44) | (w12 << 20) & 0x3ffffffff, parm);\ - DST(op,i*32+23, (w12 >> 14) & 0x3ffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*17+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w12 >> 48) | (w13 << 16) & 0x3ffffffff, parm);\ - DST(op,i*32+25, (w13 >> 18) & 0x3ffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*17+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w13 >> 52) | (w14 << 12) & 0x3ffffffff, parm);\ - DST(op,i*32+27, (w14 >> 22) & 0x3ffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*17+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w14 >> 56) | (w15 << 8) & 0x3ffffffff, parm);\ - DST(op,i*32+29, (w15 >> 26) & 0x3ffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*17+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w15 >> 60) | (w16 << 4) & 0x3ffffffff, parm);\ - DST(op,i*32+31, (w16 >> 30) , parm);;\ -} - -#define BITUNPACK64_34(ip, op, parm) { \ - BITUNBLK64_34(ip, 0, op, parm); DSTI(op); ip += 34*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_35(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*35+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*35+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 35) | (w1 << 29) & 0x7ffffffff, parm);\ - DST(op,i*64+ 2, (w1 >> 6) & 0x7ffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*35+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w1 >> 41) | (w2 << 23) & 0x7ffffffff, parm);\ - DST(op,i*64+ 4, (w2 >> 12) & 0x7ffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*35+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w2 >> 47) | (w3 << 17) & 0x7ffffffff, parm);\ - DST(op,i*64+ 6, (w3 >> 18) & 0x7ffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*35+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w3 >> 53) | (w4 << 11) & 0x7ffffffff, parm);\ - DST(op,i*64+ 8, (w4 >> 24) & 0x7ffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*35+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w4 >> 59) | (w5 << 5) & 0x7ffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*35+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w5 >> 30) | (w6 << 34) & 0x7ffffffff, parm);\ - DST(op,i*64+11, (w6 >> 1) & 0x7ffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*35+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w6 >> 36) | (w7 << 28) & 0x7ffffffff, parm);\ - DST(op,i*64+13, (w7 >> 7) & 0x7ffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*35+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w7 >> 42) | (w8 << 22) & 0x7ffffffff, parm);\ - DST(op,i*64+15, (w8 >> 13) & 0x7ffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*35+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w8 >> 48) | (w9 << 16) & 0x7ffffffff, parm);\ - DST(op,i*64+17, (w9 >> 19) & 0x7ffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*35+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w9 >> 54) | (w10 << 10) & 0x7ffffffff, parm);\ - DST(op,i*64+19, (w10 >> 25) & 0x7ffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*35+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w10 >> 60) | (w11 << 4) & 0x7ffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*35+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w11 >> 31) | (w12 << 33) & 0x7ffffffff, parm);\ - DST(op,i*64+22, (w12 >> 2) & 0x7ffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*35+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w12 >> 37) | (w13 << 27) & 0x7ffffffff, parm);\ - DST(op,i*64+24, (w13 >> 8) & 0x7ffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*35+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w13 >> 43) | (w14 << 21) & 0x7ffffffff, parm);\ - DST(op,i*64+26, (w14 >> 14) & 0x7ffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*35+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w14 >> 49) | (w15 << 15) & 0x7ffffffff, parm);\ - DST(op,i*64+28, (w15 >> 20) & 0x7ffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*35+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w15 >> 55) | (w16 << 9) & 0x7ffffffff, parm);\ - DST(op,i*64+30, (w16 >> 26) & 0x7ffffffff, parm); register uint64_t w17 = *(uint32_t *)(ip+(i*35+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w16 >> 61) | (w17 << 3) & 0x7ffffffff, parm);;\ -} - -#define BITUNPACK64_35(ip, op, parm) { \ - BITUNBLK64_35(ip, 0, op, parm); DSTI(op); ip += 35*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_36(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*9+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*9+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 1, (w0 >> 36) | (w1 << 28) & 0xfffffffff, parm);\ - DST(op,i*16+ 2, (w1 >> 8) & 0xfffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*9+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 3, (w1 >> 44) | (w2 << 20) & 0xfffffffff, parm);\ - DST(op,i*16+ 4, (w2 >> 16) & 0xfffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*9+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 5, (w2 >> 52) | (w3 << 12) & 0xfffffffff, parm);\ - DST(op,i*16+ 6, (w3 >> 24) & 0xfffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*9+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 7, (w3 >> 60) | (w4 << 4) & 0xfffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*9+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 8, (w4 >> 32) | (w5 << 32) & 0xfffffffff, parm);\ - DST(op,i*16+ 9, (w5 >> 4) & 0xfffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*9+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+10, (w5 >> 40) | (w6 << 24) & 0xfffffffff, parm);\ - DST(op,i*16+11, (w6 >> 12) & 0xfffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*9+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+12, (w6 >> 48) | (w7 << 16) & 0xfffffffff, parm);\ - DST(op,i*16+13, (w7 >> 20) & 0xfffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*9+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+14, (w7 >> 56) | (w8 << 8) & 0xfffffffff, parm);\ - DST(op,i*16+15, (w8 >> 28) , parm);;\ -} - -#define BITUNPACK64_36(ip, op, parm) { \ - BITUNBLK64_36(ip, 0, op, parm);\ - BITUNBLK64_36(ip, 1, op, parm); DSTI(op); ip += 36*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_37(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*37+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*37+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 37) | (w1 << 27) & 0x1fffffffff, parm);\ - DST(op,i*64+ 2, (w1 >> 10) & 0x1fffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*37+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w1 >> 47) | (w2 << 17) & 0x1fffffffff, parm);\ - DST(op,i*64+ 4, (w2 >> 20) & 0x1fffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*37+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w2 >> 57) | (w3 << 7) & 0x1fffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*37+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w3 >> 30) | (w4 << 34) & 0x1fffffffff, parm);\ - DST(op,i*64+ 7, (w4 >> 3) & 0x1fffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*37+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w4 >> 40) | (w5 << 24) & 0x1fffffffff, parm);\ - DST(op,i*64+ 9, (w5 >> 13) & 0x1fffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*37+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w5 >> 50) | (w6 << 14) & 0x1fffffffff, parm);\ - DST(op,i*64+11, (w6 >> 23) & 0x1fffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*37+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w6 >> 60) | (w7 << 4) & 0x1fffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*37+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w7 >> 33) | (w8 << 31) & 0x1fffffffff, parm);\ - DST(op,i*64+14, (w8 >> 6) & 0x1fffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*37+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w8 >> 43) | (w9 << 21) & 0x1fffffffff, parm);\ - DST(op,i*64+16, (w9 >> 16) & 0x1fffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*37+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w9 >> 53) | (w10 << 11) & 0x1fffffffff, parm);\ - DST(op,i*64+18, (w10 >> 26) & 0x1fffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*37+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w10 >> 63) | (w11 << 1) & 0x1fffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*37+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w11 >> 36) | (w12 << 28) & 0x1fffffffff, parm);\ - DST(op,i*64+21, (w12 >> 9) & 0x1fffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*37+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w12 >> 46) | (w13 << 18) & 0x1fffffffff, parm);\ - DST(op,i*64+23, (w13 >> 19) & 0x1fffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*37+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w13 >> 56) | (w14 << 8) & 0x1fffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*37+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w14 >> 29) | (w15 << 35) & 0x1fffffffff, parm);\ - DST(op,i*64+26, (w15 >> 2) & 0x1fffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*37+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w15 >> 39) | (w16 << 25) & 0x1fffffffff, parm);\ - DST(op,i*64+28, (w16 >> 12) & 0x1fffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*37+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w16 >> 49) | (w17 << 15) & 0x1fffffffff, parm);\ - DST(op,i*64+30, (w17 >> 22) & 0x1fffffffff, parm); register uint64_t w18 = *(uint32_t *)(ip+(i*37+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w17 >> 59) | (w18 << 5) & 0x1fffffffff, parm);;\ -} - -#define BITUNPACK64_37(ip, op, parm) { \ - BITUNBLK64_37(ip, 0, op, parm); DSTI(op); ip += 37*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_38(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*19+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*19+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 38) | (w1 << 26) & 0x3fffffffff, parm);\ - DST(op,i*32+ 2, (w1 >> 12) & 0x3fffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*19+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w1 >> 50) | (w2 << 14) & 0x3fffffffff, parm);\ - DST(op,i*32+ 4, (w2 >> 24) & 0x3fffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*19+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w2 >> 62) | (w3 << 2) & 0x3fffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*19+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w3 >> 36) | (w4 << 28) & 0x3fffffffff, parm);\ - DST(op,i*32+ 7, (w4 >> 10) & 0x3fffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*19+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w4 >> 48) | (w5 << 16) & 0x3fffffffff, parm);\ - DST(op,i*32+ 9, (w5 >> 22) & 0x3fffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*19+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w5 >> 60) | (w6 << 4) & 0x3fffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*19+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w6 >> 34) | (w7 << 30) & 0x3fffffffff, parm);\ - DST(op,i*32+12, (w7 >> 8) & 0x3fffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*19+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w7 >> 46) | (w8 << 18) & 0x3fffffffff, parm);\ - DST(op,i*32+14, (w8 >> 20) & 0x3fffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*19+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w8 >> 58) | (w9 << 6) & 0x3fffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*19+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w9 >> 32) | (w10 << 32) & 0x3fffffffff, parm);\ - DST(op,i*32+17, (w10 >> 6) & 0x3fffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*19+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w10 >> 44) | (w11 << 20) & 0x3fffffffff, parm);\ - DST(op,i*32+19, (w11 >> 18) & 0x3fffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*19+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w11 >> 56) | (w12 << 8) & 0x3fffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*19+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w12 >> 30) | (w13 << 34) & 0x3fffffffff, parm);\ - DST(op,i*32+22, (w13 >> 4) & 0x3fffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*19+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w13 >> 42) | (w14 << 22) & 0x3fffffffff, parm);\ - DST(op,i*32+24, (w14 >> 16) & 0x3fffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*19+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w14 >> 54) | (w15 << 10) & 0x3fffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*19+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w15 >> 28) | (w16 << 36) & 0x3fffffffff, parm);\ - DST(op,i*32+27, (w16 >> 2) & 0x3fffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*19+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w16 >> 40) | (w17 << 24) & 0x3fffffffff, parm);\ - DST(op,i*32+29, (w17 >> 14) & 0x3fffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*19+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w17 >> 52) | (w18 << 12) & 0x3fffffffff, parm);\ - DST(op,i*32+31, (w18 >> 26) , parm);;\ -} - -#define BITUNPACK64_38(ip, op, parm) { \ - BITUNBLK64_38(ip, 0, op, parm); DSTI(op); ip += 38*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_39(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*39+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*39+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 39) | (w1 << 25) & 0x7fffffffff, parm);\ - DST(op,i*64+ 2, (w1 >> 14) & 0x7fffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*39+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w1 >> 53) | (w2 << 11) & 0x7fffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*39+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w2 >> 28) | (w3 << 36) & 0x7fffffffff, parm);\ - DST(op,i*64+ 5, (w3 >> 3) & 0x7fffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*39+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w3 >> 42) | (w4 << 22) & 0x7fffffffff, parm);\ - DST(op,i*64+ 7, (w4 >> 17) & 0x7fffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*39+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w4 >> 56) | (w5 << 8) & 0x7fffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*39+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w5 >> 31) | (w6 << 33) & 0x7fffffffff, parm);\ - DST(op,i*64+10, (w6 >> 6) & 0x7fffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*39+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w6 >> 45) | (w7 << 19) & 0x7fffffffff, parm);\ - DST(op,i*64+12, (w7 >> 20) & 0x7fffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*39+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w7 >> 59) | (w8 << 5) & 0x7fffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*39+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w8 >> 34) | (w9 << 30) & 0x7fffffffff, parm);\ - DST(op,i*64+15, (w9 >> 9) & 0x7fffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*39+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w9 >> 48) | (w10 << 16) & 0x7fffffffff, parm);\ - DST(op,i*64+17, (w10 >> 23) & 0x7fffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*39+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w10 >> 62) | (w11 << 2) & 0x7fffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*39+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w11 >> 37) | (w12 << 27) & 0x7fffffffff, parm);\ - DST(op,i*64+20, (w12 >> 12) & 0x7fffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*39+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w12 >> 51) | (w13 << 13) & 0x7fffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*39+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w13 >> 26) | (w14 << 38) & 0x7fffffffff, parm);\ - DST(op,i*64+23, (w14 >> 1) & 0x7fffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*39+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w14 >> 40) | (w15 << 24) & 0x7fffffffff, parm);\ - DST(op,i*64+25, (w15 >> 15) & 0x7fffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*39+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w15 >> 54) | (w16 << 10) & 0x7fffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*39+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w16 >> 29) | (w17 << 35) & 0x7fffffffff, parm);\ - DST(op,i*64+28, (w17 >> 4) & 0x7fffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*39+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w17 >> 43) | (w18 << 21) & 0x7fffffffff, parm);\ - DST(op,i*64+30, (w18 >> 18) & 0x7fffffffff, parm); register uint64_t w19 = *(uint32_t *)(ip+(i*39+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w18 >> 57) | (w19 << 7) & 0x7fffffffff, parm);;\ -} - -#define BITUNPACK64_39(ip, op, parm) { \ - BITUNBLK64_39(ip, 0, op, parm); DSTI(op); ip += 39*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_40(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*5+0)*8/sizeof(ip[0]));\ - DST(op,i*8+ 0, (w0 ) & 0xffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*5+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 1, (w0 >> 40) | (w1 << 24) & 0xffffffffff, parm);\ - DST(op,i*8+ 2, (w1 >> 16) & 0xffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*5+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 3, (w1 >> 56) | (w2 << 8) & 0xffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*5+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 4, (w2 >> 32) | (w3 << 32) & 0xffffffffff, parm);\ - DST(op,i*8+ 5, (w3 >> 8) & 0xffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*5+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 6, (w3 >> 48) | (w4 << 16) & 0xffffffffff, parm);\ - DST(op,i*8+ 7, (w4 >> 24) , parm);;\ -} - -#define BITUNPACK64_40(ip, op, parm) { \ - BITUNBLK64_40(ip, 0, op, parm);\ - BITUNBLK64_40(ip, 1, op, parm);\ - BITUNBLK64_40(ip, 2, op, parm);\ - BITUNBLK64_40(ip, 3, op, parm); DSTI(op); ip += 40*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_41(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*41+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*41+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 41) | (w1 << 23) & 0x1ffffffffff, parm);\ - DST(op,i*64+ 2, (w1 >> 18) & 0x1ffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*41+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w1 >> 59) | (w2 << 5) & 0x1ffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*41+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w2 >> 36) | (w3 << 28) & 0x1ffffffffff, parm);\ - DST(op,i*64+ 5, (w3 >> 13) & 0x1ffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*41+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w3 >> 54) | (w4 << 10) & 0x1ffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*41+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w4 >> 31) | (w5 << 33) & 0x1ffffffffff, parm);\ - DST(op,i*64+ 8, (w5 >> 8) & 0x1ffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*41+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w5 >> 49) | (w6 << 15) & 0x1ffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*41+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w6 >> 26) | (w7 << 38) & 0x1ffffffffff, parm);\ - DST(op,i*64+11, (w7 >> 3) & 0x1ffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*41+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w7 >> 44) | (w8 << 20) & 0x1ffffffffff, parm);\ - DST(op,i*64+13, (w8 >> 21) & 0x1ffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*41+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w8 >> 62) | (w9 << 2) & 0x1ffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*41+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w9 >> 39) | (w10 << 25) & 0x1ffffffffff, parm);\ - DST(op,i*64+16, (w10 >> 16) & 0x1ffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*41+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w10 >> 57) | (w11 << 7) & 0x1ffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*41+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w11 >> 34) | (w12 << 30) & 0x1ffffffffff, parm);\ - DST(op,i*64+19, (w12 >> 11) & 0x1ffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*41+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w12 >> 52) | (w13 << 12) & 0x1ffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*41+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w13 >> 29) | (w14 << 35) & 0x1ffffffffff, parm);\ - DST(op,i*64+22, (w14 >> 6) & 0x1ffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*41+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w14 >> 47) | (w15 << 17) & 0x1ffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*41+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w15 >> 24) | (w16 << 40) & 0x1ffffffffff, parm);\ - DST(op,i*64+25, (w16 >> 1) & 0x1ffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*41+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w16 >> 42) | (w17 << 22) & 0x1ffffffffff, parm);\ - DST(op,i*64+27, (w17 >> 19) & 0x1ffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*41+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w17 >> 60) | (w18 << 4) & 0x1ffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*41+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w18 >> 37) | (w19 << 27) & 0x1ffffffffff, parm);\ - DST(op,i*64+30, (w19 >> 14) & 0x1ffffffffff, parm); register uint64_t w20 = *(uint32_t *)(ip+(i*41+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w19 >> 55) | (w20 << 9) & 0x1ffffffffff, parm);;\ -} - -#define BITUNPACK64_41(ip, op, parm) { \ - BITUNBLK64_41(ip, 0, op, parm); DSTI(op); ip += 41*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_42(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*21+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*21+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 42) | (w1 << 22) & 0x3ffffffffff, parm);\ - DST(op,i*32+ 2, (w1 >> 20) & 0x3ffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*21+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w1 >> 62) | (w2 << 2) & 0x3ffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*21+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w2 >> 40) | (w3 << 24) & 0x3ffffffffff, parm);\ - DST(op,i*32+ 5, (w3 >> 18) & 0x3ffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*21+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w3 >> 60) | (w4 << 4) & 0x3ffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*21+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w4 >> 38) | (w5 << 26) & 0x3ffffffffff, parm);\ - DST(op,i*32+ 8, (w5 >> 16) & 0x3ffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*21+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w5 >> 58) | (w6 << 6) & 0x3ffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*21+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w6 >> 36) | (w7 << 28) & 0x3ffffffffff, parm);\ - DST(op,i*32+11, (w7 >> 14) & 0x3ffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*21+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w7 >> 56) | (w8 << 8) & 0x3ffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*21+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w8 >> 34) | (w9 << 30) & 0x3ffffffffff, parm);\ - DST(op,i*32+14, (w9 >> 12) & 0x3ffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*21+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w9 >> 54) | (w10 << 10) & 0x3ffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*21+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w10 >> 32) | (w11 << 32) & 0x3ffffffffff, parm);\ - DST(op,i*32+17, (w11 >> 10) & 0x3ffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*21+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w11 >> 52) | (w12 << 12) & 0x3ffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*21+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w12 >> 30) | (w13 << 34) & 0x3ffffffffff, parm);\ - DST(op,i*32+20, (w13 >> 8) & 0x3ffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*21+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w13 >> 50) | (w14 << 14) & 0x3ffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*21+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w14 >> 28) | (w15 << 36) & 0x3ffffffffff, parm);\ - DST(op,i*32+23, (w15 >> 6) & 0x3ffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*21+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w15 >> 48) | (w16 << 16) & 0x3ffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*21+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w16 >> 26) | (w17 << 38) & 0x3ffffffffff, parm);\ - DST(op,i*32+26, (w17 >> 4) & 0x3ffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*21+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w17 >> 46) | (w18 << 18) & 0x3ffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*21+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w18 >> 24) | (w19 << 40) & 0x3ffffffffff, parm);\ - DST(op,i*32+29, (w19 >> 2) & 0x3ffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*21+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w19 >> 44) | (w20 << 20) & 0x3ffffffffff, parm);\ - DST(op,i*32+31, (w20 >> 22) , parm);;\ -} - -#define BITUNPACK64_42(ip, op, parm) { \ - BITUNBLK64_42(ip, 0, op, parm); DSTI(op); ip += 42*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_43(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*43+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*43+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 43) | (w1 << 21) & 0x7ffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*43+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 22) | (w2 << 42) & 0x7ffffffffff, parm);\ - DST(op,i*64+ 3, (w2 >> 1) & 0x7ffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*43+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w2 >> 44) | (w3 << 20) & 0x7ffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*43+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w3 >> 23) | (w4 << 41) & 0x7ffffffffff, parm);\ - DST(op,i*64+ 6, (w4 >> 2) & 0x7ffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*43+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w4 >> 45) | (w5 << 19) & 0x7ffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*43+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w5 >> 24) | (w6 << 40) & 0x7ffffffffff, parm);\ - DST(op,i*64+ 9, (w6 >> 3) & 0x7ffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*43+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w6 >> 46) | (w7 << 18) & 0x7ffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*43+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w7 >> 25) | (w8 << 39) & 0x7ffffffffff, parm);\ - DST(op,i*64+12, (w8 >> 4) & 0x7ffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*43+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w8 >> 47) | (w9 << 17) & 0x7ffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*43+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w9 >> 26) | (w10 << 38) & 0x7ffffffffff, parm);\ - DST(op,i*64+15, (w10 >> 5) & 0x7ffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*43+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w10 >> 48) | (w11 << 16) & 0x7ffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*43+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w11 >> 27) | (w12 << 37) & 0x7ffffffffff, parm);\ - DST(op,i*64+18, (w12 >> 6) & 0x7ffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*43+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w12 >> 49) | (w13 << 15) & 0x7ffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*43+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w13 >> 28) | (w14 << 36) & 0x7ffffffffff, parm);\ - DST(op,i*64+21, (w14 >> 7) & 0x7ffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*43+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w14 >> 50) | (w15 << 14) & 0x7ffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*43+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w15 >> 29) | (w16 << 35) & 0x7ffffffffff, parm);\ - DST(op,i*64+24, (w16 >> 8) & 0x7ffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*43+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w16 >> 51) | (w17 << 13) & 0x7ffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*43+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w17 >> 30) | (w18 << 34) & 0x7ffffffffff, parm);\ - DST(op,i*64+27, (w18 >> 9) & 0x7ffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*43+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w18 >> 52) | (w19 << 12) & 0x7ffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*43+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w19 >> 31) | (w20 << 33) & 0x7ffffffffff, parm);\ - DST(op,i*64+30, (w20 >> 10) & 0x7ffffffffff, parm); register uint64_t w21 = *(uint32_t *)(ip+(i*43+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w20 >> 53) | (w21 << 11) & 0x7ffffffffff, parm);;\ -} - -#define BITUNPACK64_43(ip, op, parm) { \ - BITUNBLK64_43(ip, 0, op, parm); DSTI(op); ip += 43*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_44(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*11+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*11+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 1, (w0 >> 44) | (w1 << 20) & 0xfffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*11+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 2, (w1 >> 24) | (w2 << 40) & 0xfffffffffff, parm);\ - DST(op,i*16+ 3, (w2 >> 4) & 0xfffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*11+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 4, (w2 >> 48) | (w3 << 16) & 0xfffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*11+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 5, (w3 >> 28) | (w4 << 36) & 0xfffffffffff, parm);\ - DST(op,i*16+ 6, (w4 >> 8) & 0xfffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*11+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 7, (w4 >> 52) | (w5 << 12) & 0xfffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*11+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 8, (w5 >> 32) | (w6 << 32) & 0xfffffffffff, parm);\ - DST(op,i*16+ 9, (w6 >> 12) & 0xfffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*11+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+10, (w6 >> 56) | (w7 << 8) & 0xfffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*11+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+11, (w7 >> 36) | (w8 << 28) & 0xfffffffffff, parm);\ - DST(op,i*16+12, (w8 >> 16) & 0xfffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*11+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+13, (w8 >> 60) | (w9 << 4) & 0xfffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*11+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+14, (w9 >> 40) | (w10 << 24) & 0xfffffffffff, parm);\ - DST(op,i*16+15, (w10 >> 20) , parm);;\ -} - -#define BITUNPACK64_44(ip, op, parm) { \ - BITUNBLK64_44(ip, 0, op, parm);\ - BITUNBLK64_44(ip, 1, op, parm); DSTI(op); ip += 44*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_45(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*45+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*45+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 45) | (w1 << 19) & 0x1fffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*45+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 26) | (w2 << 38) & 0x1fffffffffff, parm);\ - DST(op,i*64+ 3, (w2 >> 7) & 0x1fffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*45+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w2 >> 52) | (w3 << 12) & 0x1fffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*45+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w3 >> 33) | (w4 << 31) & 0x1fffffffffff, parm);\ - DST(op,i*64+ 6, (w4 >> 14) & 0x1fffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*45+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w4 >> 59) | (w5 << 5) & 0x1fffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*45+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w5 >> 40) | (w6 << 24) & 0x1fffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*45+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w6 >> 21) | (w7 << 43) & 0x1fffffffffff, parm);\ - DST(op,i*64+10, (w7 >> 2) & 0x1fffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*45+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w7 >> 47) | (w8 << 17) & 0x1fffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*45+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w8 >> 28) | (w9 << 36) & 0x1fffffffffff, parm);\ - DST(op,i*64+13, (w9 >> 9) & 0x1fffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*45+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w9 >> 54) | (w10 << 10) & 0x1fffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*45+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w10 >> 35) | (w11 << 29) & 0x1fffffffffff, parm);\ - DST(op,i*64+16, (w11 >> 16) & 0x1fffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*45+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w11 >> 61) | (w12 << 3) & 0x1fffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*45+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w12 >> 42) | (w13 << 22) & 0x1fffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*45+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w13 >> 23) | (w14 << 41) & 0x1fffffffffff, parm);\ - DST(op,i*64+20, (w14 >> 4) & 0x1fffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*45+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w14 >> 49) | (w15 << 15) & 0x1fffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*45+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w15 >> 30) | (w16 << 34) & 0x1fffffffffff, parm);\ - DST(op,i*64+23, (w16 >> 11) & 0x1fffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*45+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w16 >> 56) | (w17 << 8) & 0x1fffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*45+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w17 >> 37) | (w18 << 27) & 0x1fffffffffff, parm);\ - DST(op,i*64+26, (w18 >> 18) & 0x1fffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*45+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w18 >> 63) | (w19 << 1) & 0x1fffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*45+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w19 >> 44) | (w20 << 20) & 0x1fffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*45+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w20 >> 25) | (w21 << 39) & 0x1fffffffffff, parm);\ - DST(op,i*64+30, (w21 >> 6) & 0x1fffffffffff, parm); register uint64_t w22 = *(uint32_t *)(ip+(i*45+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w21 >> 51) | (w22 << 13) & 0x1fffffffffff, parm);;\ -} - -#define BITUNPACK64_45(ip, op, parm) { \ - BITUNBLK64_45(ip, 0, op, parm); DSTI(op); ip += 45*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_46(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*23+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*23+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 46) | (w1 << 18) & 0x3fffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*23+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w1 >> 28) | (w2 << 36) & 0x3fffffffffff, parm);\ - DST(op,i*32+ 3, (w2 >> 10) & 0x3fffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*23+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w2 >> 56) | (w3 << 8) & 0x3fffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*23+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w3 >> 38) | (w4 << 26) & 0x3fffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*23+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w4 >> 20) | (w5 << 44) & 0x3fffffffffff, parm);\ - DST(op,i*32+ 7, (w5 >> 2) & 0x3fffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*23+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w5 >> 48) | (w6 << 16) & 0x3fffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*23+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w6 >> 30) | (w7 << 34) & 0x3fffffffffff, parm);\ - DST(op,i*32+10, (w7 >> 12) & 0x3fffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*23+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w7 >> 58) | (w8 << 6) & 0x3fffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*23+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w8 >> 40) | (w9 << 24) & 0x3fffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*23+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w9 >> 22) | (w10 << 42) & 0x3fffffffffff, parm);\ - DST(op,i*32+14, (w10 >> 4) & 0x3fffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*23+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w10 >> 50) | (w11 << 14) & 0x3fffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*23+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w11 >> 32) | (w12 << 32) & 0x3fffffffffff, parm);\ - DST(op,i*32+17, (w12 >> 14) & 0x3fffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*23+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w12 >> 60) | (w13 << 4) & 0x3fffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*23+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w13 >> 42) | (w14 << 22) & 0x3fffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*23+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w14 >> 24) | (w15 << 40) & 0x3fffffffffff, parm);\ - DST(op,i*32+21, (w15 >> 6) & 0x3fffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*23+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w15 >> 52) | (w16 << 12) & 0x3fffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*23+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w16 >> 34) | (w17 << 30) & 0x3fffffffffff, parm);\ - DST(op,i*32+24, (w17 >> 16) & 0x3fffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*23+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w17 >> 62) | (w18 << 2) & 0x3fffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*23+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w18 >> 44) | (w19 << 20) & 0x3fffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*23+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w19 >> 26) | (w20 << 38) & 0x3fffffffffff, parm);\ - DST(op,i*32+28, (w20 >> 8) & 0x3fffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*23+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w20 >> 54) | (w21 << 10) & 0x3fffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*23+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w21 >> 36) | (w22 << 28) & 0x3fffffffffff, parm);\ - DST(op,i*32+31, (w22 >> 18) , parm);;\ -} - -#define BITUNPACK64_46(ip, op, parm) { \ - BITUNBLK64_46(ip, 0, op, parm); DSTI(op); ip += 46*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_47(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*47+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*47+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 47) | (w1 << 17) & 0x7fffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*47+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 30) | (w2 << 34) & 0x7fffffffffff, parm);\ - DST(op,i*64+ 3, (w2 >> 13) & 0x7fffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*47+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w2 >> 60) | (w3 << 4) & 0x7fffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*47+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w3 >> 43) | (w4 << 21) & 0x7fffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*47+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w4 >> 26) | (w5 << 38) & 0x7fffffffffff, parm);\ - DST(op,i*64+ 7, (w5 >> 9) & 0x7fffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*47+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w5 >> 56) | (w6 << 8) & 0x7fffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*47+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w6 >> 39) | (w7 << 25) & 0x7fffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*47+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w7 >> 22) | (w8 << 42) & 0x7fffffffffff, parm);\ - DST(op,i*64+11, (w8 >> 5) & 0x7fffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*47+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w8 >> 52) | (w9 << 12) & 0x7fffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*47+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w9 >> 35) | (w10 << 29) & 0x7fffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*47+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w10 >> 18) | (w11 << 46) & 0x7fffffffffff, parm);\ - DST(op,i*64+15, (w11 >> 1) & 0x7fffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*47+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w11 >> 48) | (w12 << 16) & 0x7fffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*47+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w12 >> 31) | (w13 << 33) & 0x7fffffffffff, parm);\ - DST(op,i*64+18, (w13 >> 14) & 0x7fffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*47+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w13 >> 61) | (w14 << 3) & 0x7fffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*47+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w14 >> 44) | (w15 << 20) & 0x7fffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*47+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w15 >> 27) | (w16 << 37) & 0x7fffffffffff, parm);\ - DST(op,i*64+22, (w16 >> 10) & 0x7fffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*47+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w16 >> 57) | (w17 << 7) & 0x7fffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*47+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w17 >> 40) | (w18 << 24) & 0x7fffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*47+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w18 >> 23) | (w19 << 41) & 0x7fffffffffff, parm);\ - DST(op,i*64+26, (w19 >> 6) & 0x7fffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*47+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w19 >> 53) | (w20 << 11) & 0x7fffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*47+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w20 >> 36) | (w21 << 28) & 0x7fffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*47+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w21 >> 19) | (w22 << 45) & 0x7fffffffffff, parm);\ - DST(op,i*64+30, (w22 >> 2) & 0x7fffffffffff, parm); register uint64_t w23 = *(uint32_t *)(ip+(i*47+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w22 >> 49) | (w23 << 15) & 0x7fffffffffff, parm);;\ -} - -#define BITUNPACK64_47(ip, op, parm) { \ - BITUNBLK64_47(ip, 0, op, parm); DSTI(op); ip += 47*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_48(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*3+0)*8/sizeof(ip[0]));\ - DST(op,i*4+ 0, (w0 ) & 0xffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*3+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*4+ 1, (w0 >> 48) | (w1 << 16) & 0xffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*3+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*4+ 2, (w1 >> 32) | (w2 << 32) & 0xffffffffffff, parm);\ - DST(op,i*4+ 3, (w2 >> 16) , parm);;\ -} - -#define BITUNPACK64_48(ip, op, parm) { \ - BITUNBLK64_48(ip, 0, op, parm);\ - BITUNBLK64_48(ip, 1, op, parm);\ - BITUNBLK64_48(ip, 2, op, parm);\ - BITUNBLK64_48(ip, 3, op, parm);\ - BITUNBLK64_48(ip, 4, op, parm);\ - BITUNBLK64_48(ip, 5, op, parm);\ - BITUNBLK64_48(ip, 6, op, parm);\ - BITUNBLK64_48(ip, 7, op, parm); DSTI(op); ip += 48*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_49(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*49+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*49+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 49) | (w1 << 15) & 0x1ffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*49+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 34) | (w2 << 30) & 0x1ffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*49+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 19) | (w3 << 45) & 0x1ffffffffffff, parm);\ - DST(op,i*64+ 4, (w3 >> 4) & 0x1ffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*49+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w3 >> 53) | (w4 << 11) & 0x1ffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*49+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w4 >> 38) | (w5 << 26) & 0x1ffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*49+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w5 >> 23) | (w6 << 41) & 0x1ffffffffffff, parm);\ - DST(op,i*64+ 8, (w6 >> 8) & 0x1ffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*49+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w6 >> 57) | (w7 << 7) & 0x1ffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*49+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w7 >> 42) | (w8 << 22) & 0x1ffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*49+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w8 >> 27) | (w9 << 37) & 0x1ffffffffffff, parm);\ - DST(op,i*64+12, (w9 >> 12) & 0x1ffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*49+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w9 >> 61) | (w10 << 3) & 0x1ffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*49+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w10 >> 46) | (w11 << 18) & 0x1ffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*49+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w11 >> 31) | (w12 << 33) & 0x1ffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*49+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w12 >> 16) | (w13 << 48) & 0x1ffffffffffff, parm);\ - DST(op,i*64+17, (w13 >> 1) & 0x1ffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*49+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w13 >> 50) | (w14 << 14) & 0x1ffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*49+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w14 >> 35) | (w15 << 29) & 0x1ffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*49+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w15 >> 20) | (w16 << 44) & 0x1ffffffffffff, parm);\ - DST(op,i*64+21, (w16 >> 5) & 0x1ffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*49+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w16 >> 54) | (w17 << 10) & 0x1ffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*49+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w17 >> 39) | (w18 << 25) & 0x1ffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*49+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w18 >> 24) | (w19 << 40) & 0x1ffffffffffff, parm);\ - DST(op,i*64+25, (w19 >> 9) & 0x1ffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*49+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w19 >> 58) | (w20 << 6) & 0x1ffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*49+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w20 >> 43) | (w21 << 21) & 0x1ffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*49+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w21 >> 28) | (w22 << 36) & 0x1ffffffffffff, parm);\ - DST(op,i*64+29, (w22 >> 13) & 0x1ffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*49+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w22 >> 62) | (w23 << 2) & 0x1ffffffffffff, parm); register uint64_t w24 = *(uint32_t *)(ip+(i*49+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w23 >> 47) | (w24 << 17) & 0x1ffffffffffff, parm);;\ -} - -#define BITUNPACK64_49(ip, op, parm) { \ - BITUNBLK64_49(ip, 0, op, parm); DSTI(op); ip += 49*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_50(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*25+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*25+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 50) | (w1 << 14) & 0x3ffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*25+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w1 >> 36) | (w2 << 28) & 0x3ffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*25+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w2 >> 22) | (w3 << 42) & 0x3ffffffffffff, parm);\ - DST(op,i*32+ 4, (w3 >> 8) & 0x3ffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*25+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w3 >> 58) | (w4 << 6) & 0x3ffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*25+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w4 >> 44) | (w5 << 20) & 0x3ffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*25+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w5 >> 30) | (w6 << 34) & 0x3ffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*25+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w6 >> 16) | (w7 << 48) & 0x3ffffffffffff, parm);\ - DST(op,i*32+ 9, (w7 >> 2) & 0x3ffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*25+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w7 >> 52) | (w8 << 12) & 0x3ffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*25+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w8 >> 38) | (w9 << 26) & 0x3ffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*25+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w9 >> 24) | (w10 << 40) & 0x3ffffffffffff, parm);\ - DST(op,i*32+13, (w10 >> 10) & 0x3ffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*25+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w10 >> 60) | (w11 << 4) & 0x3ffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*25+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w11 >> 46) | (w12 << 18) & 0x3ffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*25+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w12 >> 32) | (w13 << 32) & 0x3ffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*25+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w13 >> 18) | (w14 << 46) & 0x3ffffffffffff, parm);\ - DST(op,i*32+18, (w14 >> 4) & 0x3ffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*25+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w14 >> 54) | (w15 << 10) & 0x3ffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*25+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w15 >> 40) | (w16 << 24) & 0x3ffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*25+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w16 >> 26) | (w17 << 38) & 0x3ffffffffffff, parm);\ - DST(op,i*32+22, (w17 >> 12) & 0x3ffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*25+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w17 >> 62) | (w18 << 2) & 0x3ffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*25+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w18 >> 48) | (w19 << 16) & 0x3ffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*25+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w19 >> 34) | (w20 << 30) & 0x3ffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*25+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w20 >> 20) | (w21 << 44) & 0x3ffffffffffff, parm);\ - DST(op,i*32+27, (w21 >> 6) & 0x3ffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*25+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w21 >> 56) | (w22 << 8) & 0x3ffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*25+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w22 >> 42) | (w23 << 22) & 0x3ffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*25+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w23 >> 28) | (w24 << 36) & 0x3ffffffffffff, parm);\ - DST(op,i*32+31, (w24 >> 14) , parm);;\ -} - -#define BITUNPACK64_50(ip, op, parm) { \ - BITUNBLK64_50(ip, 0, op, parm); DSTI(op); ip += 50*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_51(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*51+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*51+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 51) | (w1 << 13) & 0x7ffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*51+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 38) | (w2 << 26) & 0x7ffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*51+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 25) | (w3 << 39) & 0x7ffffffffffff, parm);\ - DST(op,i*64+ 4, (w3 >> 12) & 0x7ffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*51+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w3 >> 63) | (w4 << 1) & 0x7ffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*51+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w4 >> 50) | (w5 << 14) & 0x7ffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*51+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w5 >> 37) | (w6 << 27) & 0x7ffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*51+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w6 >> 24) | (w7 << 40) & 0x7ffffffffffff, parm);\ - DST(op,i*64+ 9, (w7 >> 11) & 0x7ffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*51+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w7 >> 62) | (w8 << 2) & 0x7ffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*51+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w8 >> 49) | (w9 << 15) & 0x7ffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*51+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w9 >> 36) | (w10 << 28) & 0x7ffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*51+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w10 >> 23) | (w11 << 41) & 0x7ffffffffffff, parm);\ - DST(op,i*64+14, (w11 >> 10) & 0x7ffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*51+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w11 >> 61) | (w12 << 3) & 0x7ffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*51+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w12 >> 48) | (w13 << 16) & 0x7ffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*51+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w13 >> 35) | (w14 << 29) & 0x7ffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*51+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w14 >> 22) | (w15 << 42) & 0x7ffffffffffff, parm);\ - DST(op,i*64+19, (w15 >> 9) & 0x7ffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*51+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w15 >> 60) | (w16 << 4) & 0x7ffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*51+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w16 >> 47) | (w17 << 17) & 0x7ffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*51+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w17 >> 34) | (w18 << 30) & 0x7ffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*51+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w18 >> 21) | (w19 << 43) & 0x7ffffffffffff, parm);\ - DST(op,i*64+24, (w19 >> 8) & 0x7ffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*51+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w19 >> 59) | (w20 << 5) & 0x7ffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*51+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w20 >> 46) | (w21 << 18) & 0x7ffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*51+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w21 >> 33) | (w22 << 31) & 0x7ffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*51+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w22 >> 20) | (w23 << 44) & 0x7ffffffffffff, parm);\ - DST(op,i*64+29, (w23 >> 7) & 0x7ffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*51+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w23 >> 58) | (w24 << 6) & 0x7ffffffffffff, parm); register uint64_t w25 = *(uint32_t *)(ip+(i*51+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w24 >> 45) | (w25 << 19) & 0x7ffffffffffff, parm);;\ -} - -#define BITUNPACK64_51(ip, op, parm) { \ - BITUNBLK64_51(ip, 0, op, parm); DSTI(op); ip += 51*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_52(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*13+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*13+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 1, (w0 >> 52) | (w1 << 12) & 0xfffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*13+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 2, (w1 >> 40) | (w2 << 24) & 0xfffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*13+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 3, (w2 >> 28) | (w3 << 36) & 0xfffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*13+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 4, (w3 >> 16) | (w4 << 48) & 0xfffffffffffff, parm);\ - DST(op,i*16+ 5, (w4 >> 4) & 0xfffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*13+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 6, (w4 >> 56) | (w5 << 8) & 0xfffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*13+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 7, (w5 >> 44) | (w6 << 20) & 0xfffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*13+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 8, (w6 >> 32) | (w7 << 32) & 0xfffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*13+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 9, (w7 >> 20) | (w8 << 44) & 0xfffffffffffff, parm);\ - DST(op,i*16+10, (w8 >> 8) & 0xfffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*13+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+11, (w8 >> 60) | (w9 << 4) & 0xfffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*13+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+12, (w9 >> 48) | (w10 << 16) & 0xfffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*13+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+13, (w10 >> 36) | (w11 << 28) & 0xfffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*13+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+14, (w11 >> 24) | (w12 << 40) & 0xfffffffffffff, parm);\ - DST(op,i*16+15, (w12 >> 12) , parm);;\ -} - -#define BITUNPACK64_52(ip, op, parm) { \ - BITUNBLK64_52(ip, 0, op, parm);\ - BITUNBLK64_52(ip, 1, op, parm); DSTI(op); ip += 52*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_53(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*53+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*53+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 53) | (w1 << 11) & 0x1fffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*53+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 42) | (w2 << 22) & 0x1fffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*53+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 31) | (w3 << 33) & 0x1fffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*53+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 20) | (w4 << 44) & 0x1fffffffffffff, parm);\ - DST(op,i*64+ 5, (w4 >> 9) & 0x1fffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*53+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w4 >> 62) | (w5 << 2) & 0x1fffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*53+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w5 >> 51) | (w6 << 13) & 0x1fffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*53+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w6 >> 40) | (w7 << 24) & 0x1fffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*53+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w7 >> 29) | (w8 << 35) & 0x1fffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*53+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w8 >> 18) | (w9 << 46) & 0x1fffffffffffff, parm);\ - DST(op,i*64+11, (w9 >> 7) & 0x1fffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*53+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w9 >> 60) | (w10 << 4) & 0x1fffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*53+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w10 >> 49) | (w11 << 15) & 0x1fffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*53+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w11 >> 38) | (w12 << 26) & 0x1fffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*53+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w12 >> 27) | (w13 << 37) & 0x1fffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*53+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w13 >> 16) | (w14 << 48) & 0x1fffffffffffff, parm);\ - DST(op,i*64+17, (w14 >> 5) & 0x1fffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*53+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w14 >> 58) | (w15 << 6) & 0x1fffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*53+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w15 >> 47) | (w16 << 17) & 0x1fffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*53+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w16 >> 36) | (w17 << 28) & 0x1fffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*53+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w17 >> 25) | (w18 << 39) & 0x1fffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*53+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w18 >> 14) | (w19 << 50) & 0x1fffffffffffff, parm);\ - DST(op,i*64+23, (w19 >> 3) & 0x1fffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*53+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w19 >> 56) | (w20 << 8) & 0x1fffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*53+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w20 >> 45) | (w21 << 19) & 0x1fffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*53+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w21 >> 34) | (w22 << 30) & 0x1fffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*53+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w22 >> 23) | (w23 << 41) & 0x1fffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*53+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w23 >> 12) | (w24 << 52) & 0x1fffffffffffff, parm);\ - DST(op,i*64+29, (w24 >> 1) & 0x1fffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*53+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w24 >> 54) | (w25 << 10) & 0x1fffffffffffff, parm); register uint64_t w26 = *(uint32_t *)(ip+(i*53+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w25 >> 43) | (w26 << 21) & 0x1fffffffffffff, parm);;\ -} - -#define BITUNPACK64_53(ip, op, parm) { \ - BITUNBLK64_53(ip, 0, op, parm); DSTI(op); ip += 53*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_54(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*27+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*27+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 54) | (w1 << 10) & 0x3fffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*27+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w1 >> 44) | (w2 << 20) & 0x3fffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*27+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w2 >> 34) | (w3 << 30) & 0x3fffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*27+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w3 >> 24) | (w4 << 40) & 0x3fffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*27+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w4 >> 14) | (w5 << 50) & 0x3fffffffffffff, parm);\ - DST(op,i*32+ 6, (w5 >> 4) & 0x3fffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*27+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w5 >> 58) | (w6 << 6) & 0x3fffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*27+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w6 >> 48) | (w7 << 16) & 0x3fffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*27+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w7 >> 38) | (w8 << 26) & 0x3fffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*27+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w8 >> 28) | (w9 << 36) & 0x3fffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*27+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w9 >> 18) | (w10 << 46) & 0x3fffffffffffff, parm);\ - DST(op,i*32+12, (w10 >> 8) & 0x3fffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*27+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w10 >> 62) | (w11 << 2) & 0x3fffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*27+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w11 >> 52) | (w12 << 12) & 0x3fffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*27+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w12 >> 42) | (w13 << 22) & 0x3fffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*27+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w13 >> 32) | (w14 << 32) & 0x3fffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*27+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w14 >> 22) | (w15 << 42) & 0x3fffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*27+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w15 >> 12) | (w16 << 52) & 0x3fffffffffffff, parm);\ - DST(op,i*32+19, (w16 >> 2) & 0x3fffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*27+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w16 >> 56) | (w17 << 8) & 0x3fffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*27+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w17 >> 46) | (w18 << 18) & 0x3fffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*27+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w18 >> 36) | (w19 << 28) & 0x3fffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*27+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w19 >> 26) | (w20 << 38) & 0x3fffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*27+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w20 >> 16) | (w21 << 48) & 0x3fffffffffffff, parm);\ - DST(op,i*32+25, (w21 >> 6) & 0x3fffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*27+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w21 >> 60) | (w22 << 4) & 0x3fffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*27+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w22 >> 50) | (w23 << 14) & 0x3fffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*27+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w23 >> 40) | (w24 << 24) & 0x3fffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*27+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w24 >> 30) | (w25 << 34) & 0x3fffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*27+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w25 >> 20) | (w26 << 44) & 0x3fffffffffffff, parm);\ - DST(op,i*32+31, (w26 >> 10) , parm);;\ -} - -#define BITUNPACK64_54(ip, op, parm) { \ - BITUNBLK64_54(ip, 0, op, parm); DSTI(op); ip += 54*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_55(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*55+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*55+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 55) | (w1 << 9) & 0x7fffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*55+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 46) | (w2 << 18) & 0x7fffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*55+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 37) | (w3 << 27) & 0x7fffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*55+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 28) | (w4 << 36) & 0x7fffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*55+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w4 >> 19) | (w5 << 45) & 0x7fffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*55+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w5 >> 10) | (w6 << 54) & 0x7fffffffffffff, parm);\ - DST(op,i*64+ 7, (w6 >> 1) & 0x7fffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*55+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w6 >> 56) | (w7 << 8) & 0x7fffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*55+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w7 >> 47) | (w8 << 17) & 0x7fffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*55+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w8 >> 38) | (w9 << 26) & 0x7fffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*55+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w9 >> 29) | (w10 << 35) & 0x7fffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*55+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w10 >> 20) | (w11 << 44) & 0x7fffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*55+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w11 >> 11) | (w12 << 53) & 0x7fffffffffffff, parm);\ - DST(op,i*64+14, (w12 >> 2) & 0x7fffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*55+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w12 >> 57) | (w13 << 7) & 0x7fffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*55+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w13 >> 48) | (w14 << 16) & 0x7fffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*55+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w14 >> 39) | (w15 << 25) & 0x7fffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*55+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w15 >> 30) | (w16 << 34) & 0x7fffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*55+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w16 >> 21) | (w17 << 43) & 0x7fffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*55+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w17 >> 12) | (w18 << 52) & 0x7fffffffffffff, parm);\ - DST(op,i*64+21, (w18 >> 3) & 0x7fffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*55+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w18 >> 58) | (w19 << 6) & 0x7fffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*55+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w19 >> 49) | (w20 << 15) & 0x7fffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*55+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w20 >> 40) | (w21 << 24) & 0x7fffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*55+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w21 >> 31) | (w22 << 33) & 0x7fffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*55+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w22 >> 22) | (w23 << 42) & 0x7fffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*55+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w23 >> 13) | (w24 << 51) & 0x7fffffffffffff, parm);\ - DST(op,i*64+28, (w24 >> 4) & 0x7fffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*55+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w24 >> 59) | (w25 << 5) & 0x7fffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*55+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w25 >> 50) | (w26 << 14) & 0x7fffffffffffff, parm); register uint64_t w27 = *(uint32_t *)(ip+(i*55+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w26 >> 41) | (w27 << 23) & 0x7fffffffffffff, parm);;\ -} - -#define BITUNPACK64_55(ip, op, parm) { \ - BITUNBLK64_55(ip, 0, op, parm); DSTI(op); ip += 55*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_56(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*7+0)*8/sizeof(ip[0]));\ - DST(op,i*8+ 0, (w0 ) & 0xffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*7+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 1, (w0 >> 56) | (w1 << 8) & 0xffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*7+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 2, (w1 >> 48) | (w2 << 16) & 0xffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*7+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 3, (w2 >> 40) | (w3 << 24) & 0xffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*7+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 4, (w3 >> 32) | (w4 << 32) & 0xffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*7+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 5, (w4 >> 24) | (w5 << 40) & 0xffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*7+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*8+ 6, (w5 >> 16) | (w6 << 48) & 0xffffffffffffff, parm);\ - DST(op,i*8+ 7, (w6 >> 8) , parm);;\ -} - -#define BITUNPACK64_56(ip, op, parm) { \ - BITUNBLK64_56(ip, 0, op, parm);\ - BITUNBLK64_56(ip, 1, op, parm);\ - BITUNBLK64_56(ip, 2, op, parm);\ - BITUNBLK64_56(ip, 3, op, parm); DSTI(op); ip += 56*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_57(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*57+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1ffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*57+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 57) | (w1 << 7) & 0x1ffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*57+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 50) | (w2 << 14) & 0x1ffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*57+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 43) | (w3 << 21) & 0x1ffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*57+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 36) | (w4 << 28) & 0x1ffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*57+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w4 >> 29) | (w5 << 35) & 0x1ffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*57+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w5 >> 22) | (w6 << 42) & 0x1ffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*57+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w6 >> 15) | (w7 << 49) & 0x1ffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*57+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w7 >> 8) | (w8 << 56) & 0x1ffffffffffffff, parm);\ - DST(op,i*64+ 9, (w8 >> 1) & 0x1ffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*57+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w8 >> 58) | (w9 << 6) & 0x1ffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*57+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w9 >> 51) | (w10 << 13) & 0x1ffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*57+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w10 >> 44) | (w11 << 20) & 0x1ffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*57+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w11 >> 37) | (w12 << 27) & 0x1ffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*57+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w12 >> 30) | (w13 << 34) & 0x1ffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*57+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w13 >> 23) | (w14 << 41) & 0x1ffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*57+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w14 >> 16) | (w15 << 48) & 0x1ffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*57+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w15 >> 9) | (w16 << 55) & 0x1ffffffffffffff, parm);\ - DST(op,i*64+18, (w16 >> 2) & 0x1ffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*57+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w16 >> 59) | (w17 << 5) & 0x1ffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*57+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w17 >> 52) | (w18 << 12) & 0x1ffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*57+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w18 >> 45) | (w19 << 19) & 0x1ffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*57+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w19 >> 38) | (w20 << 26) & 0x1ffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*57+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w20 >> 31) | (w21 << 33) & 0x1ffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*57+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w21 >> 24) | (w22 << 40) & 0x1ffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*57+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w22 >> 17) | (w23 << 47) & 0x1ffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*57+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w23 >> 10) | (w24 << 54) & 0x1ffffffffffffff, parm);\ - DST(op,i*64+27, (w24 >> 3) & 0x1ffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*57+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w24 >> 60) | (w25 << 4) & 0x1ffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*57+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w25 >> 53) | (w26 << 11) & 0x1ffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*57+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w26 >> 46) | (w27 << 18) & 0x1ffffffffffffff, parm); register uint64_t w28 = *(uint32_t *)(ip+(i*57+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w27 >> 39) | (w28 << 25) & 0x1ffffffffffffff, parm);;\ -} - -#define BITUNPACK64_57(ip, op, parm) { \ - BITUNBLK64_57(ip, 0, op, parm); DSTI(op); ip += 57*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_58(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*29+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3ffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*29+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 58) | (w1 << 6) & 0x3ffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*29+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w1 >> 52) | (w2 << 12) & 0x3ffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*29+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w2 >> 46) | (w3 << 18) & 0x3ffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*29+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w3 >> 40) | (w4 << 24) & 0x3ffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*29+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w4 >> 34) | (w5 << 30) & 0x3ffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*29+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w5 >> 28) | (w6 << 36) & 0x3ffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*29+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w6 >> 22) | (w7 << 42) & 0x3ffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*29+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w7 >> 16) | (w8 << 48) & 0x3ffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*29+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w8 >> 10) | (w9 << 54) & 0x3ffffffffffffff, parm);\ - DST(op,i*32+10, (w9 >> 4) & 0x3ffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*29+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w9 >> 62) | (w10 << 2) & 0x3ffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*29+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w10 >> 56) | (w11 << 8) & 0x3ffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*29+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w11 >> 50) | (w12 << 14) & 0x3ffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*29+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w12 >> 44) | (w13 << 20) & 0x3ffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*29+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w13 >> 38) | (w14 << 26) & 0x3ffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*29+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w14 >> 32) | (w15 << 32) & 0x3ffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*29+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w15 >> 26) | (w16 << 38) & 0x3ffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*29+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w16 >> 20) | (w17 << 44) & 0x3ffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*29+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w17 >> 14) | (w18 << 50) & 0x3ffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*29+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w18 >> 8) | (w19 << 56) & 0x3ffffffffffffff, parm);\ - DST(op,i*32+21, (w19 >> 2) & 0x3ffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*29+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w19 >> 60) | (w20 << 4) & 0x3ffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*29+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w20 >> 54) | (w21 << 10) & 0x3ffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*29+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w21 >> 48) | (w22 << 16) & 0x3ffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*29+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w22 >> 42) | (w23 << 22) & 0x3ffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*29+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w23 >> 36) | (w24 << 28) & 0x3ffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*29+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w24 >> 30) | (w25 << 34) & 0x3ffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*29+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w25 >> 24) | (w26 << 40) & 0x3ffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*29+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w26 >> 18) | (w27 << 46) & 0x3ffffffffffffff, parm); register uint64_t w28 = *(uint64_t *)(ip+(i*29+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w27 >> 12) | (w28 << 52) & 0x3ffffffffffffff, parm);\ - DST(op,i*32+31, (w28 >> 6) , parm);;\ -} - -#define BITUNPACK64_58(ip, op, parm) { \ - BITUNBLK64_58(ip, 0, op, parm); DSTI(op); ip += 58*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_59(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*59+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7ffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*59+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 59) | (w1 << 5) & 0x7ffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*59+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 54) | (w2 << 10) & 0x7ffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*59+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 49) | (w3 << 15) & 0x7ffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*59+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 44) | (w4 << 20) & 0x7ffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*59+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w4 >> 39) | (w5 << 25) & 0x7ffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*59+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w5 >> 34) | (w6 << 30) & 0x7ffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*59+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w6 >> 29) | (w7 << 35) & 0x7ffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*59+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w7 >> 24) | (w8 << 40) & 0x7ffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*59+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w8 >> 19) | (w9 << 45) & 0x7ffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*59+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w9 >> 14) | (w10 << 50) & 0x7ffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*59+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w10 >> 9) | (w11 << 55) & 0x7ffffffffffffff, parm);\ - DST(op,i*64+12, (w11 >> 4) & 0x7ffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*59+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w11 >> 63) | (w12 << 1) & 0x7ffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*59+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w12 >> 58) | (w13 << 6) & 0x7ffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*59+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w13 >> 53) | (w14 << 11) & 0x7ffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*59+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w14 >> 48) | (w15 << 16) & 0x7ffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*59+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w15 >> 43) | (w16 << 21) & 0x7ffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*59+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w16 >> 38) | (w17 << 26) & 0x7ffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*59+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w17 >> 33) | (w18 << 31) & 0x7ffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*59+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w18 >> 28) | (w19 << 36) & 0x7ffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*59+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w19 >> 23) | (w20 << 41) & 0x7ffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*59+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w20 >> 18) | (w21 << 46) & 0x7ffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*59+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w21 >> 13) | (w22 << 51) & 0x7ffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*59+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w22 >> 8) | (w23 << 56) & 0x7ffffffffffffff, parm);\ - DST(op,i*64+25, (w23 >> 3) & 0x7ffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*59+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w23 >> 62) | (w24 << 2) & 0x7ffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*59+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w24 >> 57) | (w25 << 7) & 0x7ffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*59+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w25 >> 52) | (w26 << 12) & 0x7ffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*59+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w26 >> 47) | (w27 << 17) & 0x7ffffffffffffff, parm); register uint64_t w28 = *(uint64_t *)(ip+(i*59+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w27 >> 42) | (w28 << 22) & 0x7ffffffffffffff, parm); register uint64_t w29 = *(uint32_t *)(ip+(i*59+29)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w28 >> 37) | (w29 << 27) & 0x7ffffffffffffff, parm);;\ -} - -#define BITUNPACK64_59(ip, op, parm) { \ - BITUNBLK64_59(ip, 0, op, parm); DSTI(op); ip += 59*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_60(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*15+0)*8/sizeof(ip[0]));\ - DST(op,i*16+ 0, (w0 ) & 0xfffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*15+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 1, (w0 >> 60) | (w1 << 4) & 0xfffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*15+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 2, (w1 >> 56) | (w2 << 8) & 0xfffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*15+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 3, (w2 >> 52) | (w3 << 12) & 0xfffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*15+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 4, (w3 >> 48) | (w4 << 16) & 0xfffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*15+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 5, (w4 >> 44) | (w5 << 20) & 0xfffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*15+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 6, (w5 >> 40) | (w6 << 24) & 0xfffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*15+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 7, (w6 >> 36) | (w7 << 28) & 0xfffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*15+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 8, (w7 >> 32) | (w8 << 32) & 0xfffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*15+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+ 9, (w8 >> 28) | (w9 << 36) & 0xfffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*15+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+10, (w9 >> 24) | (w10 << 40) & 0xfffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*15+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+11, (w10 >> 20) | (w11 << 44) & 0xfffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*15+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+12, (w11 >> 16) | (w12 << 48) & 0xfffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*15+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+13, (w12 >> 12) | (w13 << 52) & 0xfffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*15+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*16+14, (w13 >> 8) | (w14 << 56) & 0xfffffffffffffff, parm);\ - DST(op,i*16+15, (w14 >> 4) , parm);;\ -} - -#define BITUNPACK64_60(ip, op, parm) { \ - BITUNBLK64_60(ip, 0, op, parm);\ - BITUNBLK64_60(ip, 1, op, parm); DSTI(op); ip += 60*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_61(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*61+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x1fffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*61+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 61) | (w1 << 3) & 0x1fffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*61+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 58) | (w2 << 6) & 0x1fffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*61+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 55) | (w3 << 9) & 0x1fffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*61+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 52) | (w4 << 12) & 0x1fffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*61+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w4 >> 49) | (w5 << 15) & 0x1fffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*61+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w5 >> 46) | (w6 << 18) & 0x1fffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*61+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w6 >> 43) | (w7 << 21) & 0x1fffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*61+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w7 >> 40) | (w8 << 24) & 0x1fffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*61+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w8 >> 37) | (w9 << 27) & 0x1fffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*61+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w9 >> 34) | (w10 << 30) & 0x1fffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*61+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w10 >> 31) | (w11 << 33) & 0x1fffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*61+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w11 >> 28) | (w12 << 36) & 0x1fffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*61+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w12 >> 25) | (w13 << 39) & 0x1fffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*61+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w13 >> 22) | (w14 << 42) & 0x1fffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*61+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w14 >> 19) | (w15 << 45) & 0x1fffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*61+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w15 >> 16) | (w16 << 48) & 0x1fffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*61+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w16 >> 13) | (w17 << 51) & 0x1fffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*61+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w17 >> 10) | (w18 << 54) & 0x1fffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*61+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w18 >> 7) | (w19 << 57) & 0x1fffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*61+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w19 >> 4) | (w20 << 60) & 0x1fffffffffffffff, parm);\ - DST(op,i*64+21, (w20 >> 1) & 0x1fffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*61+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w20 >> 62) | (w21 << 2) & 0x1fffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*61+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w21 >> 59) | (w22 << 5) & 0x1fffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*61+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w22 >> 56) | (w23 << 8) & 0x1fffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*61+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w23 >> 53) | (w24 << 11) & 0x1fffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*61+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w24 >> 50) | (w25 << 14) & 0x1fffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*61+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w25 >> 47) | (w26 << 17) & 0x1fffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*61+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w26 >> 44) | (w27 << 20) & 0x1fffffffffffffff, parm); register uint64_t w28 = *(uint64_t *)(ip+(i*61+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w27 >> 41) | (w28 << 23) & 0x1fffffffffffffff, parm); register uint64_t w29 = *(uint64_t *)(ip+(i*61+29)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w28 >> 38) | (w29 << 26) & 0x1fffffffffffffff, parm); register uint64_t w30 = *(uint32_t *)(ip+(i*61+30)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w29 >> 35) | (w30 << 29) & 0x1fffffffffffffff, parm);;\ -} - -#define BITUNPACK64_61(ip, op, parm) { \ - BITUNBLK64_61(ip, 0, op, parm); DSTI(op); ip += 61*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_62(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*31+0)*8/sizeof(ip[0]));\ - DST(op,i*32+ 0, (w0 ) & 0x3fffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*31+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 1, (w0 >> 62) | (w1 << 2) & 0x3fffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*31+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 2, (w1 >> 60) | (w2 << 4) & 0x3fffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*31+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 3, (w2 >> 58) | (w3 << 6) & 0x3fffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*31+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 4, (w3 >> 56) | (w4 << 8) & 0x3fffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*31+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 5, (w4 >> 54) | (w5 << 10) & 0x3fffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*31+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 6, (w5 >> 52) | (w6 << 12) & 0x3fffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*31+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 7, (w6 >> 50) | (w7 << 14) & 0x3fffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*31+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 8, (w7 >> 48) | (w8 << 16) & 0x3fffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*31+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+ 9, (w8 >> 46) | (w9 << 18) & 0x3fffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*31+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+10, (w9 >> 44) | (w10 << 20) & 0x3fffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*31+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+11, (w10 >> 42) | (w11 << 22) & 0x3fffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*31+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+12, (w11 >> 40) | (w12 << 24) & 0x3fffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*31+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+13, (w12 >> 38) | (w13 << 26) & 0x3fffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*31+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+14, (w13 >> 36) | (w14 << 28) & 0x3fffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*31+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+15, (w14 >> 34) | (w15 << 30) & 0x3fffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*31+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+16, (w15 >> 32) | (w16 << 32) & 0x3fffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*31+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+17, (w16 >> 30) | (w17 << 34) & 0x3fffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*31+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+18, (w17 >> 28) | (w18 << 36) & 0x3fffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*31+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+19, (w18 >> 26) | (w19 << 38) & 0x3fffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*31+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+20, (w19 >> 24) | (w20 << 40) & 0x3fffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*31+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+21, (w20 >> 22) | (w21 << 42) & 0x3fffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*31+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+22, (w21 >> 20) | (w22 << 44) & 0x3fffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*31+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+23, (w22 >> 18) | (w23 << 46) & 0x3fffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*31+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+24, (w23 >> 16) | (w24 << 48) & 0x3fffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*31+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+25, (w24 >> 14) | (w25 << 50) & 0x3fffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*31+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+26, (w25 >> 12) | (w26 << 52) & 0x3fffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*31+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+27, (w26 >> 10) | (w27 << 54) & 0x3fffffffffffffff, parm); register uint64_t w28 = *(uint64_t *)(ip+(i*31+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+28, (w27 >> 8) | (w28 << 56) & 0x3fffffffffffffff, parm); register uint64_t w29 = *(uint64_t *)(ip+(i*31+29)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+29, (w28 >> 6) | (w29 << 58) & 0x3fffffffffffffff, parm); register uint64_t w30 = *(uint64_t *)(ip+(i*31+30)*8/sizeof(ip[0]));\ -\ - DST(op,i*32+30, (w29 >> 4) | (w30 << 60) & 0x3fffffffffffffff, parm);\ - DST(op,i*32+31, (w30 >> 2) , parm);;\ -} - -#define BITUNPACK64_62(ip, op, parm) { \ - BITUNBLK64_62(ip, 0, op, parm); DSTI(op); ip += 62*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_63(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*63+0)*8/sizeof(ip[0]));\ - DST(op,i*64+ 0, (w0 ) & 0x7fffffffffffffff, parm); register uint64_t w1 = *(uint64_t *)(ip+(i*63+1)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 1, (w0 >> 63) | (w1 << 1) & 0x7fffffffffffffff, parm); register uint64_t w2 = *(uint64_t *)(ip+(i*63+2)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 2, (w1 >> 62) | (w2 << 2) & 0x7fffffffffffffff, parm); register uint64_t w3 = *(uint64_t *)(ip+(i*63+3)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 3, (w2 >> 61) | (w3 << 3) & 0x7fffffffffffffff, parm); register uint64_t w4 = *(uint64_t *)(ip+(i*63+4)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 4, (w3 >> 60) | (w4 << 4) & 0x7fffffffffffffff, parm); register uint64_t w5 = *(uint64_t *)(ip+(i*63+5)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 5, (w4 >> 59) | (w5 << 5) & 0x7fffffffffffffff, parm); register uint64_t w6 = *(uint64_t *)(ip+(i*63+6)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 6, (w5 >> 58) | (w6 << 6) & 0x7fffffffffffffff, parm); register uint64_t w7 = *(uint64_t *)(ip+(i*63+7)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 7, (w6 >> 57) | (w7 << 7) & 0x7fffffffffffffff, parm); register uint64_t w8 = *(uint64_t *)(ip+(i*63+8)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 8, (w7 >> 56) | (w8 << 8) & 0x7fffffffffffffff, parm); register uint64_t w9 = *(uint64_t *)(ip+(i*63+9)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+ 9, (w8 >> 55) | (w9 << 9) & 0x7fffffffffffffff, parm); register uint64_t w10 = *(uint64_t *)(ip+(i*63+10)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+10, (w9 >> 54) | (w10 << 10) & 0x7fffffffffffffff, parm); register uint64_t w11 = *(uint64_t *)(ip+(i*63+11)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+11, (w10 >> 53) | (w11 << 11) & 0x7fffffffffffffff, parm); register uint64_t w12 = *(uint64_t *)(ip+(i*63+12)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+12, (w11 >> 52) | (w12 << 12) & 0x7fffffffffffffff, parm); register uint64_t w13 = *(uint64_t *)(ip+(i*63+13)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+13, (w12 >> 51) | (w13 << 13) & 0x7fffffffffffffff, parm); register uint64_t w14 = *(uint64_t *)(ip+(i*63+14)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+14, (w13 >> 50) | (w14 << 14) & 0x7fffffffffffffff, parm); register uint64_t w15 = *(uint64_t *)(ip+(i*63+15)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+15, (w14 >> 49) | (w15 << 15) & 0x7fffffffffffffff, parm); register uint64_t w16 = *(uint64_t *)(ip+(i*63+16)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+16, (w15 >> 48) | (w16 << 16) & 0x7fffffffffffffff, parm); register uint64_t w17 = *(uint64_t *)(ip+(i*63+17)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+17, (w16 >> 47) | (w17 << 17) & 0x7fffffffffffffff, parm); register uint64_t w18 = *(uint64_t *)(ip+(i*63+18)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+18, (w17 >> 46) | (w18 << 18) & 0x7fffffffffffffff, parm); register uint64_t w19 = *(uint64_t *)(ip+(i*63+19)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+19, (w18 >> 45) | (w19 << 19) & 0x7fffffffffffffff, parm); register uint64_t w20 = *(uint64_t *)(ip+(i*63+20)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+20, (w19 >> 44) | (w20 << 20) & 0x7fffffffffffffff, parm); register uint64_t w21 = *(uint64_t *)(ip+(i*63+21)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+21, (w20 >> 43) | (w21 << 21) & 0x7fffffffffffffff, parm); register uint64_t w22 = *(uint64_t *)(ip+(i*63+22)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+22, (w21 >> 42) | (w22 << 22) & 0x7fffffffffffffff, parm); register uint64_t w23 = *(uint64_t *)(ip+(i*63+23)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+23, (w22 >> 41) | (w23 << 23) & 0x7fffffffffffffff, parm); register uint64_t w24 = *(uint64_t *)(ip+(i*63+24)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+24, (w23 >> 40) | (w24 << 24) & 0x7fffffffffffffff, parm); register uint64_t w25 = *(uint64_t *)(ip+(i*63+25)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+25, (w24 >> 39) | (w25 << 25) & 0x7fffffffffffffff, parm); register uint64_t w26 = *(uint64_t *)(ip+(i*63+26)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+26, (w25 >> 38) | (w26 << 26) & 0x7fffffffffffffff, parm); register uint64_t w27 = *(uint64_t *)(ip+(i*63+27)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+27, (w26 >> 37) | (w27 << 27) & 0x7fffffffffffffff, parm); register uint64_t w28 = *(uint64_t *)(ip+(i*63+28)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+28, (w27 >> 36) | (w28 << 28) & 0x7fffffffffffffff, parm); register uint64_t w29 = *(uint64_t *)(ip+(i*63+29)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+29, (w28 >> 35) | (w29 << 29) & 0x7fffffffffffffff, parm); register uint64_t w30 = *(uint64_t *)(ip+(i*63+30)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+30, (w29 >> 34) | (w30 << 30) & 0x7fffffffffffffff, parm); register uint64_t w31 = *(uint32_t *)(ip+(i*63+31)*8/sizeof(ip[0]));\ -\ - DST(op,i*64+31, (w30 >> 33) | (w31 << 31) & 0x7fffffffffffffff, parm);;\ -} - -#define BITUNPACK64_63(ip, op, parm) { \ - BITUNBLK64_63(ip, 0, op, parm); DSTI(op); ip += 63*4/sizeof(ip[0]);\ -} - -#define BITUNBLK64_64(ip, i, op, parm) { register uint64_t w0 = *(uint64_t *)(ip+(i*1+0)*8/sizeof(ip[0]));\ - DST(op,i*1+ 0, (w0 ) , parm);;\ -} - -#define BITUNPACK64_64(ip, op, parm) { \ - BITUNBLK64_64(ip, 0, op, parm);\ - BITUNBLK64_64(ip, 1, op, parm);\ - BITUNBLK64_64(ip, 2, op, parm);\ - BITUNBLK64_64(ip, 3, op, parm);\ - BITUNBLK64_64(ip, 4, op, parm);\ - BITUNBLK64_64(ip, 5, op, parm);\ - BITUNBLK64_64(ip, 6, op, parm);\ - BITUNBLK64_64(ip, 7, op, parm);\ - BITUNBLK64_64(ip, 8, op, parm);\ - BITUNBLK64_64(ip, 9, op, parm);\ - BITUNBLK64_64(ip, 10, op, parm);\ - BITUNBLK64_64(ip, 11, op, parm);\ - BITUNBLK64_64(ip, 12, op, parm);\ - BITUNBLK64_64(ip, 13, op, parm);\ - BITUNBLK64_64(ip, 14, op, parm);\ - BITUNBLK64_64(ip, 15, op, parm);\ - BITUNBLK64_64(ip, 16, op, parm);\ - BITUNBLK64_64(ip, 17, op, parm);\ - BITUNBLK64_64(ip, 18, op, parm);\ - BITUNBLK64_64(ip, 19, op, parm);\ - BITUNBLK64_64(ip, 20, op, parm);\ - BITUNBLK64_64(ip, 21, op, parm);\ - BITUNBLK64_64(ip, 22, op, parm);\ - BITUNBLK64_64(ip, 23, op, parm);\ - BITUNBLK64_64(ip, 24, op, parm);\ - BITUNBLK64_64(ip, 25, op, parm);\ - BITUNBLK64_64(ip, 26, op, parm);\ - BITUNBLK64_64(ip, 27, op, parm);\ - BITUNBLK64_64(ip, 28, op, parm);\ - BITUNBLK64_64(ip, 29, op, parm);\ - BITUNBLK64_64(ip, 30, op, parm);\ - BITUNBLK64_64(ip, 31, op, parm); DSTI(op); ip += 64*4/sizeof(ip[0]);\ +// bitpack include +#define BITBLK32_1(ip, i, op, parm) { ; register uint32_t w;;\ + IPPB(ip, i*32+ 0, parm); w = (uint32_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); w |= (uint32_t)SRC(ip, i*32+ 1) << 1;\ + IPPB(ip, i*32+ 2, parm); w |= (uint32_t)SRC(ip, i*32+ 2) << 2;\ + IPPB(ip, i*32+ 3, parm); w |= (uint32_t)SRC(ip, i*32+ 3) << 3;\ + IPPB(ip, i*32+ 4, parm); w |= (uint32_t)SRC(ip, i*32+ 4) << 4;\ + IPPB(ip, i*32+ 5, parm); w |= (uint32_t)SRC(ip, i*32+ 5) << 5;\ + IPPB(ip, i*32+ 6, parm); w |= (uint32_t)SRC(ip, i*32+ 6) << 6;\ + IPPB(ip, i*32+ 7, parm); w |= (uint32_t)SRC(ip, i*32+ 7) << 7;\ + IPPB(ip, i*32+ 8, parm); w |= (uint32_t)SRC(ip, i*32+ 8) << 8;\ + IPPB(ip, i*32+ 9, parm); w |= (uint32_t)SRC(ip, i*32+ 9) << 9;\ + IPPB(ip, i*32+10, parm); w |= (uint32_t)SRC(ip, i*32+10) << 10;\ + IPPB(ip, i*32+11, parm); w |= (uint32_t)SRC(ip, i*32+11) << 11;\ + IPPB(ip, i*32+12, parm); w |= (uint32_t)SRC(ip, i*32+12) << 12;\ + IPPB(ip, i*32+13, parm); w |= (uint32_t)SRC(ip, i*32+13) << 13;\ + IPPB(ip, i*32+14, parm); w |= (uint32_t)SRC(ip, i*32+14) << 14;\ + IPPB(ip, i*32+15, parm); w |= (uint32_t)SRC(ip, i*32+15) << 15;\ + IPPB(ip, i*32+16, parm); w |= (uint32_t)SRC(ip, i*32+16) << 16;\ + IPPB(ip, i*32+17, parm); w |= (uint32_t)SRC(ip, i*32+17) << 17;\ + IPPB(ip, i*32+18, parm); w |= (uint32_t)SRC(ip, i*32+18) << 18;\ + IPPB(ip, i*32+19, parm); w |= (uint32_t)SRC(ip, i*32+19) << 19;\ + IPPB(ip, i*32+20, parm); w |= (uint32_t)SRC(ip, i*32+20) << 20;\ + IPPB(ip, i*32+21, parm); w |= (uint32_t)SRC(ip, i*32+21) << 21;\ + IPPB(ip, i*32+22, parm); w |= (uint32_t)SRC(ip, i*32+22) << 22;\ + IPPB(ip, i*32+23, parm); w |= (uint32_t)SRC(ip, i*32+23) << 23;\ + IPPB(ip, i*32+24, parm); w |= (uint32_t)SRC(ip, i*32+24) << 24;\ + IPPB(ip, i*32+25, parm); w |= (uint32_t)SRC(ip, i*32+25) << 25;\ + IPPB(ip, i*32+26, parm); w |= (uint32_t)SRC(ip, i*32+26) << 26;\ + IPPB(ip, i*32+27, parm); w |= (uint32_t)SRC(ip, i*32+27) << 27;\ + IPPB(ip, i*32+28, parm); w |= (uint32_t)SRC(ip, i*32+28) << 28;\ + IPPB(ip, i*32+29, parm); w |= (uint32_t)SRC(ip, i*32+29) << 29;\ + IPPB(ip, i*32+30, parm); w |= (uint32_t)SRC(ip, i*32+30) << 30;\ + IPPB(ip, i*32+31, parm); w |= (uint32_t)SRC(ip, i*32+31) << 31;*((uint32_t *)op+i*1+ 0) = w;;\ +} + +#define BITPACK64_1(ip, op, parm) { \ + BITBLK32_1(ip, 0, op, parm); SRCI(ip); op += 1*4/sizeof(op[0]);\ +} + +#define BITBLK64_2(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*32+ 0, parm); w = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); w |= (uint64_t)SRC(ip, i*32+ 1) << 2;\ + IPPB(ip, i*32+ 2, parm); w |= (uint64_t)SRC(ip, i*32+ 2) << 4;\ + IPPB(ip, i*32+ 3, parm); w |= (uint64_t)SRC(ip, i*32+ 3) << 6;\ + IPPB(ip, i*32+ 4, parm); w |= (uint64_t)SRC(ip, i*32+ 4) << 8;\ + IPPB(ip, i*32+ 5, parm); w |= (uint64_t)SRC(ip, i*32+ 5) << 10;\ + IPPB(ip, i*32+ 6, parm); w |= (uint64_t)SRC(ip, i*32+ 6) << 12;\ + IPPB(ip, i*32+ 7, parm); w |= (uint64_t)SRC(ip, i*32+ 7) << 14;\ + IPPB(ip, i*32+ 8, parm); w |= (uint64_t)SRC(ip, i*32+ 8) << 16;\ + IPPB(ip, i*32+ 9, parm); w |= (uint64_t)SRC(ip, i*32+ 9) << 18;\ + IPPB(ip, i*32+10, parm); w |= (uint64_t)SRC(ip, i*32+10) << 20;\ + IPPB(ip, i*32+11, parm); w |= (uint64_t)SRC(ip, i*32+11) << 22;\ + IPPB(ip, i*32+12, parm); w |= (uint64_t)SRC(ip, i*32+12) << 24;\ + IPPB(ip, i*32+13, parm); w |= (uint64_t)SRC(ip, i*32+13) << 26;\ + IPPB(ip, i*32+14, parm); w |= (uint64_t)SRC(ip, i*32+14) << 28;\ + IPPB(ip, i*32+15, parm); w |= (uint64_t)SRC(ip, i*32+15) << 30;\ + IPPB(ip, i*32+16, parm); w |= (uint64_t)SRC(ip, i*32+16) << 32;\ + IPPB(ip, i*32+17, parm); w |= (uint64_t)SRC(ip, i*32+17) << 34;\ + IPPB(ip, i*32+18, parm); w |= (uint64_t)SRC(ip, i*32+18) << 36;\ + IPPB(ip, i*32+19, parm); w |= (uint64_t)SRC(ip, i*32+19) << 38;\ + IPPB(ip, i*32+20, parm); w |= (uint64_t)SRC(ip, i*32+20) << 40;\ + IPPB(ip, i*32+21, parm); w |= (uint64_t)SRC(ip, i*32+21) << 42;\ + IPPB(ip, i*32+22, parm); w |= (uint64_t)SRC(ip, i*32+22) << 44;\ + IPPB(ip, i*32+23, parm); w |= (uint64_t)SRC(ip, i*32+23) << 46;\ + IPPB(ip, i*32+24, parm); w |= (uint64_t)SRC(ip, i*32+24) << 48;\ + IPPB(ip, i*32+25, parm); w |= (uint64_t)SRC(ip, i*32+25) << 50;\ + IPPB(ip, i*32+26, parm); w |= (uint64_t)SRC(ip, i*32+26) << 52;\ + IPPB(ip, i*32+27, parm); w |= (uint64_t)SRC(ip, i*32+27) << 54;\ + IPPB(ip, i*32+28, parm); w |= (uint64_t)SRC(ip, i*32+28) << 56;\ + IPPB(ip, i*32+29, parm); w |= (uint64_t)SRC(ip, i*32+29) << 58;\ + IPPB(ip, i*32+30, parm); w |= (uint64_t)SRC(ip, i*32+30) << 60;\ + IPPB(ip, i*32+31, parm); w |= (uint64_t)SRC(ip, i*32+31) << 62;*((uint64_t *)op+i*1+ 0) = w;;\ +} + +#define BITPACK64_2(ip, op, parm) { \ + BITBLK64_2(ip, 0, op, parm); SRCI(ip); op += 2*4/sizeof(op[0]);\ +} + +#define BITBLK64_3(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*64+ 0, parm); w = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); w |= (uint64_t)SRC(ip, i*64+ 1) << 3;\ + IPPB(ip, i*64+ 2, parm); w |= (uint64_t)SRC(ip, i*64+ 2) << 6;\ + IPPB(ip, i*64+ 3, parm); w |= (uint64_t)SRC(ip, i*64+ 3) << 9;\ + IPPB(ip, i*64+ 4, parm); w |= (uint64_t)SRC(ip, i*64+ 4) << 12;\ + IPPB(ip, i*64+ 5, parm); w |= (uint64_t)SRC(ip, i*64+ 5) << 15;\ + IPPB(ip, i*64+ 6, parm); w |= (uint64_t)SRC(ip, i*64+ 6) << 18;\ + IPPB(ip, i*64+ 7, parm); w |= (uint64_t)SRC(ip, i*64+ 7) << 21;\ + IPPB(ip, i*64+ 8, parm); w |= (uint64_t)SRC(ip, i*64+ 8) << 24;\ + IPPB(ip, i*64+ 9, parm); w |= (uint64_t)SRC(ip, i*64+ 9) << 27;\ + IPPB(ip, i*64+10, parm); w |= (uint64_t)SRC(ip, i*64+10) << 30;\ + IPPB(ip, i*64+11, parm); w |= (uint64_t)SRC(ip, i*64+11) << 33;\ + IPPB(ip, i*64+12, parm); w |= (uint64_t)SRC(ip, i*64+12) << 36;\ + IPPB(ip, i*64+13, parm); w |= (uint64_t)SRC(ip, i*64+13) << 39;\ + IPPB(ip, i*64+14, parm); w |= (uint64_t)SRC(ip, i*64+14) << 42;\ + IPPB(ip, i*64+15, parm); w |= (uint64_t)SRC(ip, i*64+15) << 45;\ + IPPB(ip, i*64+16, parm); w |= (uint64_t)SRC(ip, i*64+16) << 48;\ + IPPB(ip, i*64+17, parm); w |= (uint64_t)SRC(ip, i*64+17) << 51;\ + IPPB(ip, i*64+18, parm); w |= (uint64_t)SRC(ip, i*64+18) << 54;\ + IPPB(ip, i*64+19, parm); w |= (uint64_t)SRC(ip, i*64+19) << 57;\ + IPPB(ip, i*64+20, parm); w |= (uint64_t)SRC(ip, i*64+20) << 60 | (uint64_t)SRC1(ip, i*64+21) << 63;*((uint64_t *)op+i*3+ 0) = w;\ + IPPB(ip, i*64+21, parm); w = (uint64_t)SRC(ip, i*64+21) >> 1;\ + IPPB(ip, i*64+22, parm); w |= (uint64_t)SRC(ip, i*64+22) << 2;\ + IPPB(ip, i*64+23, parm); w |= (uint64_t)SRC(ip, i*64+23) << 5;\ + IPPB(ip, i*64+24, parm); w |= (uint64_t)SRC(ip, i*64+24) << 8;\ + IPPB(ip, i*64+25, parm); w |= (uint64_t)SRC(ip, i*64+25) << 11;\ + IPPB(ip, i*64+26, parm); w |= (uint64_t)SRC(ip, i*64+26) << 14;\ + IPPB(ip, i*64+27, parm); w |= (uint64_t)SRC(ip, i*64+27) << 17;\ + IPPB(ip, i*64+28, parm); w |= (uint64_t)SRC(ip, i*64+28) << 20;\ + IPPB(ip, i*64+29, parm); w |= (uint64_t)SRC(ip, i*64+29) << 23;\ + IPPB(ip, i*64+30, parm); w |= (uint64_t)SRC(ip, i*64+30) << 26;\ + IPPB(ip, i*64+31, parm); w |= (uint64_t)SRC(ip, i*64+31) << 29;*((uint64_t *)op+i*3+ 1) = w;;\ +} + +#define BITPACK64_3(ip, op, parm) { \ + BITBLK64_3(ip, 0, op, parm); SRCI(ip); op += 3*4/sizeof(op[0]);\ +} + +#define BITBLK64_4(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*16+ 0, parm); w = (uint64_t)SRC(ip, i*16+ 0) ;\ + IPPB(ip, i*16+ 1, parm); w |= (uint64_t)SRC(ip, i*16+ 1) << 4;\ + IPPB(ip, i*16+ 2, parm); w |= (uint64_t)SRC(ip, i*16+ 2) << 8;\ + IPPB(ip, i*16+ 3, parm); w |= (uint64_t)SRC(ip, i*16+ 3) << 12;\ + IPPB(ip, i*16+ 4, parm); w |= (uint64_t)SRC(ip, i*16+ 4) << 16;\ + IPPB(ip, i*16+ 5, parm); w |= (uint64_t)SRC(ip, i*16+ 5) << 20;\ + IPPB(ip, i*16+ 6, parm); w |= (uint64_t)SRC(ip, i*16+ 6) << 24;\ + IPPB(ip, i*16+ 7, parm); w |= (uint64_t)SRC(ip, i*16+ 7) << 28;\ + IPPB(ip, i*16+ 8, parm); w |= (uint64_t)SRC(ip, i*16+ 8) << 32;\ + IPPB(ip, i*16+ 9, parm); w |= (uint64_t)SRC(ip, i*16+ 9) << 36;\ + IPPB(ip, i*16+10, parm); w |= (uint64_t)SRC(ip, i*16+10) << 40;\ + IPPB(ip, i*16+11, parm); w |= (uint64_t)SRC(ip, i*16+11) << 44;\ + IPPB(ip, i*16+12, parm); w |= (uint64_t)SRC(ip, i*16+12) << 48;\ + IPPB(ip, i*16+13, parm); w |= (uint64_t)SRC(ip, i*16+13) << 52;\ + IPPB(ip, i*16+14, parm); w |= (uint64_t)SRC(ip, i*16+14) << 56;\ + IPPB(ip, i*16+15, parm); w |= (uint64_t)SRC(ip, i*16+15) << 60;*((uint64_t *)op+i*1+ 0) = w;;\ +} + +#define BITPACK64_4(ip, op, parm) { \ + BITBLK64_4(ip, 0, op, parm);\ + BITBLK64_4(ip, 1, op, parm); SRCI(ip); op += 4*4/sizeof(op[0]);\ +} + +#define BITBLK64_5(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*64+ 0, parm); w = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); w |= (uint64_t)SRC(ip, i*64+ 1) << 5;\ + IPPB(ip, i*64+ 2, parm); w |= (uint64_t)SRC(ip, i*64+ 2) << 10;\ + IPPB(ip, i*64+ 3, parm); w |= (uint64_t)SRC(ip, i*64+ 3) << 15;\ + IPPB(ip, i*64+ 4, parm); w |= (uint64_t)SRC(ip, i*64+ 4) << 20;\ + IPPB(ip, i*64+ 5, parm); w |= (uint64_t)SRC(ip, i*64+ 5) << 25;\ + IPPB(ip, i*64+ 6, parm); w |= (uint64_t)SRC(ip, i*64+ 6) << 30;\ + IPPB(ip, i*64+ 7, parm); w |= (uint64_t)SRC(ip, i*64+ 7) << 35;\ + IPPB(ip, i*64+ 8, parm); w |= (uint64_t)SRC(ip, i*64+ 8) << 40;\ + IPPB(ip, i*64+ 9, parm); w |= (uint64_t)SRC(ip, i*64+ 9) << 45;\ + IPPB(ip, i*64+10, parm); w |= (uint64_t)SRC(ip, i*64+10) << 50;\ + IPPB(ip, i*64+11, parm); w |= (uint64_t)SRC(ip, i*64+11) << 55 | (uint64_t)SRC1(ip, i*64+12) << 60;*((uint64_t *)op+i*5+ 0) = w;\ + IPPB(ip, i*64+12, parm); w = (uint64_t)SRC(ip, i*64+12) >> 4;\ + IPPB(ip, i*64+13, parm); w |= (uint64_t)SRC(ip, i*64+13) << 1;\ + IPPB(ip, i*64+14, parm); w |= (uint64_t)SRC(ip, i*64+14) << 6;\ + IPPB(ip, i*64+15, parm); w |= (uint64_t)SRC(ip, i*64+15) << 11;\ + IPPB(ip, i*64+16, parm); w |= (uint64_t)SRC(ip, i*64+16) << 16;\ + IPPB(ip, i*64+17, parm); w |= (uint64_t)SRC(ip, i*64+17) << 21;\ + IPPB(ip, i*64+18, parm); w |= (uint64_t)SRC(ip, i*64+18) << 26;\ + IPPB(ip, i*64+19, parm); w |= (uint64_t)SRC(ip, i*64+19) << 31;\ + IPPB(ip, i*64+20, parm); w |= (uint64_t)SRC(ip, i*64+20) << 36;\ + IPPB(ip, i*64+21, parm); w |= (uint64_t)SRC(ip, i*64+21) << 41;\ + IPPB(ip, i*64+22, parm); w |= (uint64_t)SRC(ip, i*64+22) << 46;\ + IPPB(ip, i*64+23, parm); w |= (uint64_t)SRC(ip, i*64+23) << 51;\ + IPPB(ip, i*64+24, parm); w |= (uint64_t)SRC(ip, i*64+24) << 56 | (uint64_t)SRC1(ip, i*64+25) << 61;*((uint64_t *)op+i*5+ 1) = w;\ + IPPB(ip, i*64+25, parm); w = (uint64_t)SRC(ip, i*64+25) >> 3;\ + IPPB(ip, i*64+26, parm); w |= (uint64_t)SRC(ip, i*64+26) << 2;\ + IPPB(ip, i*64+27, parm); w |= (uint64_t)SRC(ip, i*64+27) << 7;\ + IPPB(ip, i*64+28, parm); w |= (uint64_t)SRC(ip, i*64+28) << 12;\ + IPPB(ip, i*64+29, parm); w |= (uint64_t)SRC(ip, i*64+29) << 17;\ + IPPB(ip, i*64+30, parm); w |= (uint64_t)SRC(ip, i*64+30) << 22;\ + IPPB(ip, i*64+31, parm); w |= (uint64_t)SRC(ip, i*64+31) << 27;*((uint64_t *)op+i*5+ 2) = w;;\ +} + +#define BITPACK64_5(ip, op, parm) { \ + BITBLK64_5(ip, 0, op, parm); SRCI(ip); op += 5*4/sizeof(op[0]);\ +} + +#define BITBLK64_6(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*32+ 0, parm); w = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); w |= (uint64_t)SRC(ip, i*32+ 1) << 6;\ + IPPB(ip, i*32+ 2, parm); w |= (uint64_t)SRC(ip, i*32+ 2) << 12;\ + IPPB(ip, i*32+ 3, parm); w |= (uint64_t)SRC(ip, i*32+ 3) << 18;\ + IPPB(ip, i*32+ 4, parm); w |= (uint64_t)SRC(ip, i*32+ 4) << 24;\ + IPPB(ip, i*32+ 5, parm); w |= (uint64_t)SRC(ip, i*32+ 5) << 30;\ + IPPB(ip, i*32+ 6, parm); w |= (uint64_t)SRC(ip, i*32+ 6) << 36;\ + IPPB(ip, i*32+ 7, parm); w |= (uint64_t)SRC(ip, i*32+ 7) << 42;\ + IPPB(ip, i*32+ 8, parm); w |= (uint64_t)SRC(ip, i*32+ 8) << 48;\ + IPPB(ip, i*32+ 9, parm); w |= (uint64_t)SRC(ip, i*32+ 9) << 54 | (uint64_t)SRC1(ip, i*32+10) << 60;*((uint64_t *)op+i*3+ 0) = w;\ + IPPB(ip, i*32+10, parm); w = (uint64_t)SRC(ip, i*32+10) >> 4;\ + IPPB(ip, i*32+11, parm); w |= (uint64_t)SRC(ip, i*32+11) << 2;\ + IPPB(ip, i*32+12, parm); w |= (uint64_t)SRC(ip, i*32+12) << 8;\ + IPPB(ip, i*32+13, parm); w |= (uint64_t)SRC(ip, i*32+13) << 14;\ + IPPB(ip, i*32+14, parm); w |= (uint64_t)SRC(ip, i*32+14) << 20;\ + IPPB(ip, i*32+15, parm); w |= (uint64_t)SRC(ip, i*32+15) << 26;\ + IPPB(ip, i*32+16, parm); w |= (uint64_t)SRC(ip, i*32+16) << 32;\ + IPPB(ip, i*32+17, parm); w |= (uint64_t)SRC(ip, i*32+17) << 38;\ + IPPB(ip, i*32+18, parm); w |= (uint64_t)SRC(ip, i*32+18) << 44;\ + IPPB(ip, i*32+19, parm); w |= (uint64_t)SRC(ip, i*32+19) << 50;\ + IPPB(ip, i*32+20, parm); w |= (uint64_t)SRC(ip, i*32+20) << 56 | (uint64_t)SRC1(ip, i*32+21) << 62;*((uint64_t *)op+i*3+ 1) = w;\ + IPPB(ip, i*32+21, parm); w = (uint64_t)SRC(ip, i*32+21) >> 2;\ + IPPB(ip, i*32+22, parm); w |= (uint64_t)SRC(ip, i*32+22) << 4;\ + IPPB(ip, i*32+23, parm); w |= (uint64_t)SRC(ip, i*32+23) << 10;\ + IPPB(ip, i*32+24, parm); w |= (uint64_t)SRC(ip, i*32+24) << 16;\ + IPPB(ip, i*32+25, parm); w |= (uint64_t)SRC(ip, i*32+25) << 22;\ + IPPB(ip, i*32+26, parm); w |= (uint64_t)SRC(ip, i*32+26) << 28;\ + IPPB(ip, i*32+27, parm); w |= (uint64_t)SRC(ip, i*32+27) << 34;\ + IPPB(ip, i*32+28, parm); w |= (uint64_t)SRC(ip, i*32+28) << 40;\ + IPPB(ip, i*32+29, parm); w |= (uint64_t)SRC(ip, i*32+29) << 46;\ + IPPB(ip, i*32+30, parm); w |= (uint64_t)SRC(ip, i*32+30) << 52;\ + IPPB(ip, i*32+31, parm); w |= (uint64_t)SRC(ip, i*32+31) << 58;*((uint64_t *)op+i*3+ 2) = w;;\ +} + +#define BITPACK64_6(ip, op, parm) { \ + BITBLK64_6(ip, 0, op, parm); SRCI(ip); op += 6*4/sizeof(op[0]);\ +} + +#define BITBLK64_7(ip, i, op, parm) { ; register uint64_t w;;\ + IPPB(ip, i*64+ 0, parm); w = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); w |= (uint64_t)SRC(ip, i*64+ 1) << 7;\ + IPPB(ip, i*64+ 2, parm); w |= (uint64_t)SRC(ip, i*64+ 2) << 14;\ + IPPB(ip, i*64+ 3, parm); w |= (uint64_t)SRC(ip, i*64+ 3) << 21;\ + IPPB(ip, i*64+ 4, parm); w |= (uint64_t)SRC(ip, i*64+ 4) << 28;\ + IPPB(ip, i*64+ 5, parm); w |= (uint64_t)SRC(ip, i*64+ 5) << 35;\ + IPPB(ip, i*64+ 6, parm); w |= (uint64_t)SRC(ip, i*64+ 6) << 42;\ + IPPB(ip, i*64+ 7, parm); w |= (uint64_t)SRC(ip, i*64+ 7) << 49;\ + IPPB(ip, i*64+ 8, parm); w |= (uint64_t)SRC(ip, i*64+ 8) << 56 | (uint64_t)SRC1(ip, i*64+9) << 63;*((uint64_t *)op+i*7+ 0) = w;\ + IPPB(ip, i*64+ 9, parm); w = (uint64_t)SRC(ip, i*64+ 9) >> 1;\ + IPPB(ip, i*64+10, parm); w |= (uint64_t)SRC(ip, i*64+10) << 6;\ + IPPB(ip, i*64+11, parm); w |= (uint64_t)SRC(ip, i*64+11) << 13;\ + IPPB(ip, i*64+12, parm); w |= (uint64_t)SRC(ip, i*64+12) << 20;\ + IPPB(ip, i*64+13, parm); w |= (uint64_t)SRC(ip, i*64+13) << 27;\ + IPPB(ip, i*64+14, parm); w |= (uint64_t)SRC(ip, i*64+14) << 34;\ + IPPB(ip, i*64+15, parm); w |= (uint64_t)SRC(ip, i*64+15) << 41;\ + IPPB(ip, i*64+16, parm); w |= (uint64_t)SRC(ip, i*64+16) << 48;\ + IPPB(ip, i*64+17, parm); w |= (uint64_t)SRC(ip, i*64+17) << 55 | (uint64_t)SRC1(ip, i*64+18) << 62;*((uint64_t *)op+i*7+ 1) = w;\ + IPPB(ip, i*64+18, parm); w = (uint64_t)SRC(ip, i*64+18) >> 2;\ + IPPB(ip, i*64+19, parm); w |= (uint64_t)SRC(ip, i*64+19) << 5;\ + IPPB(ip, i*64+20, parm); w |= (uint64_t)SRC(ip, i*64+20) << 12;\ + IPPB(ip, i*64+21, parm); w |= (uint64_t)SRC(ip, i*64+21) << 19;\ + IPPB(ip, i*64+22, parm); w |= (uint64_t)SRC(ip, i*64+22) << 26;\ + IPPB(ip, i*64+23, parm); w |= (uint64_t)SRC(ip, i*64+23) << 33;\ + IPPB(ip, i*64+24, parm); w |= (uint64_t)SRC(ip, i*64+24) << 40;\ + IPPB(ip, i*64+25, parm); w |= (uint64_t)SRC(ip, i*64+25) << 47;\ + IPPB(ip, i*64+26, parm); w |= (uint64_t)SRC(ip, i*64+26) << 54 | (uint64_t)SRC1(ip, i*64+27) << 61;*((uint64_t *)op+i*7+ 2) = w;\ + IPPB(ip, i*64+27, parm); w = (uint64_t)SRC(ip, i*64+27) >> 3;\ + IPPB(ip, i*64+28, parm); w |= (uint64_t)SRC(ip, i*64+28) << 4;\ + IPPB(ip, i*64+29, parm); w |= (uint64_t)SRC(ip, i*64+29) << 11;\ + IPPB(ip, i*64+30, parm); w |= (uint64_t)SRC(ip, i*64+30) << 18;\ + IPPB(ip, i*64+31, parm); w |= (uint64_t)SRC(ip, i*64+31) << 25;*((uint64_t *)op+i*7+ 3) = w;;\ +} + +#define BITPACK64_7(ip, op, parm) { \ + BITBLK64_7(ip, 0, op, parm); SRCI(ip); op += 7*4/sizeof(op[0]);\ +} + +#define BITBLK64_8(ip, i, op, parm) { ;\ + IPPB(ip, i*8+ 0, parm); *((uint64_t *)op+i*1+ 0) = (uint64_t)SRC(ip, i*8+ 0) ;\ + IPPB(ip, i*8+ 1, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 1) << 8;\ + IPPB(ip, i*8+ 2, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 2) << 16;\ + IPPB(ip, i*8+ 3, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 3) << 24;\ + IPPB(ip, i*8+ 4, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 4) << 32;\ + IPPB(ip, i*8+ 5, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 5) << 40;\ + IPPB(ip, i*8+ 6, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 6) << 48;\ + IPPB(ip, i*8+ 7, parm); *((uint64_t *)op+i*1+ 0) |= (uint64_t)SRC(ip, i*8+ 7) << 56;\ +} + +#define BITPACK64_8(ip, op, parm) { \ + BITBLK64_8(ip, 0, op, parm);\ + BITBLK64_8(ip, 1, op, parm);\ + BITBLK64_8(ip, 2, op, parm);\ + BITBLK64_8(ip, 3, op, parm); SRCI(ip); op += 8*4/sizeof(op[0]);\ +} + +#define BITBLK64_9(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*9+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 9;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 18;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 3) << 27;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 4) << 36;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 5) << 45;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*64+ 6) << 54 | (uint64_t)SRC1(ip, i*64+7) << 63;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*9+ 1) = (uint64_t)SRC(ip, i*64+ 7) >> 1;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+ 8) << 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+ 9) << 17;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+10) << 26;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+11) << 35;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+12) << 44;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*64+13) << 53 | (uint64_t)SRC1(ip, i*64+14) << 62;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*9+ 2) = (uint64_t)SRC(ip, i*64+14) >> 2;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+15) << 7;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+16) << 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+17) << 25;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+18) << 34;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+19) << 43;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*64+20) << 52 | (uint64_t)SRC1(ip, i*64+21) << 61;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*9+ 3) = (uint64_t)SRC(ip, i*64+21) >> 3;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+22) << 6;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+23) << 15;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+24) << 24;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+25) << 33;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+26) << 42;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*64+27) << 51 | (uint64_t)SRC1(ip, i*64+28) << 60;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*9+ 4) = (uint64_t)SRC(ip, i*64+28) >> 4;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*9+ 4) |= (uint64_t)SRC(ip, i*64+29) << 5;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*9+ 4) |= (uint64_t)SRC(ip, i*64+30) << 14;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*9+ 4) |= (uint64_t)SRC(ip, i*64+31) << 23;\ +} + +#define BITPACK64_9(ip, op, parm) { \ + BITBLK64_9(ip, 0, op, parm); SRCI(ip); op += 9*4/sizeof(op[0]);\ +} + +#define BITBLK64_10(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*5+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 10;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*32+ 2) << 20;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*32+ 3) << 30;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*32+ 4) << 40;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*32+ 5) << 50 | (uint64_t)SRC1(ip, i*32+6) << 60;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*5+ 1) = (uint64_t)SRC(ip, i*32+ 6) >> 4;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*32+ 7) << 6;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*32+ 8) << 16;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*32+ 9) << 26;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*32+10) << 36;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*32+11) << 46 | (uint64_t)SRC1(ip, i*32+12) << 56;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*5+ 2) = (uint64_t)SRC(ip, i*32+12) >> 8;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+13) << 2;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+14) << 12;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+15) << 22;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+16) << 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+17) << 42;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*32+18) << 52 | (uint64_t)SRC1(ip, i*32+19) << 62;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*5+ 3) = (uint64_t)SRC(ip, i*32+19) >> 2;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*32+20) << 8;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*32+21) << 18;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*32+22) << 28;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*32+23) << 38;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*32+24) << 48 | (uint64_t)SRC1(ip, i*32+25) << 58;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*5+ 4) = (uint64_t)SRC(ip, i*32+25) >> 6;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+26) << 4;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+27) << 14;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+28) << 24;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+29) << 34;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+30) << 44;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*32+31) << 54;\ +} + +#define BITPACK64_10(ip, op, parm) { \ + BITBLK64_10(ip, 0, op, parm); SRCI(ip); op += 10*4/sizeof(op[0]);\ +} + +#define BITBLK64_11(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*11+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*11+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 11;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*11+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 22;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*11+ 0) |= (uint64_t)SRC(ip, i*64+ 3) << 33;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*11+ 0) |= (uint64_t)SRC(ip, i*64+ 4) << 44 | (uint64_t)SRC1(ip, i*64+5) << 55;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*11+ 1) = (uint64_t)SRC(ip, i*64+ 5) >> 9;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*64+ 6) << 2;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*64+ 7) << 13;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*64+ 8) << 24;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*64+ 9) << 35;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*64+10) << 46 | (uint64_t)SRC1(ip, i*64+11) << 57;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*11+ 2) = (uint64_t)SRC(ip, i*64+11) >> 7;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*64+12) << 4;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*64+13) << 15;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*64+14) << 26;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*64+15) << 37;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*64+16) << 48 | (uint64_t)SRC1(ip, i*64+17) << 59;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*11+ 3) = (uint64_t)SRC(ip, i*64+17) >> 5;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*64+18) << 6;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*64+19) << 17;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*64+20) << 28;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*64+21) << 39;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*64+22) << 50 | (uint64_t)SRC1(ip, i*64+23) << 61;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*11+ 4) = (uint64_t)SRC(ip, i*64+23) >> 3;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*64+24) << 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*64+25) << 19;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*64+26) << 30;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*64+27) << 41;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*64+28) << 52 | (uint64_t)SRC1(ip, i*64+29) << 63;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*11+ 5) = (uint64_t)SRC(ip, i*64+29) >> 1;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*11+ 5) |= (uint64_t)SRC(ip, i*64+30) << 10;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*11+ 5) |= (uint64_t)SRC(ip, i*64+31) << 21;\ +} + +#define BITPACK64_11(ip, op, parm) { \ + BITBLK64_11(ip, 0, op, parm); SRCI(ip); op += 11*4/sizeof(op[0]);\ +} + +#define BITBLK64_12(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*3+ 0) = (uint64_t)SRC(ip, i*16+ 0) ;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*3+ 0) |= (uint64_t)SRC(ip, i*16+ 1) << 12;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*3+ 0) |= (uint64_t)SRC(ip, i*16+ 2) << 24;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*3+ 0) |= (uint64_t)SRC(ip, i*16+ 3) << 36;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*3+ 0) |= (uint64_t)SRC(ip, i*16+ 4) << 48 | (uint64_t)SRC1(ip, i*16+5) << 60;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*3+ 1) = (uint64_t)SRC(ip, i*16+ 5) >> 4;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*16+ 6) << 8;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*16+ 7) << 20;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*16+ 8) << 32;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*16+ 9) << 44 | (uint64_t)SRC1(ip, i*16+10) << 56;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*3+ 2) = (uint64_t)SRC(ip, i*16+10) >> 8;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*16+11) << 4;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*16+12) << 16;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*16+13) << 28;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*16+14) << 40;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*16+15) << 52;\ +} + +#define BITPACK64_12(ip, op, parm) { \ + BITBLK64_12(ip, 0, op, parm);\ + BITBLK64_12(ip, 1, op, parm); SRCI(ip); op += 12*4/sizeof(op[0]);\ +} + +#define BITBLK64_13(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*13+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*13+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 13;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*13+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 26;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*13+ 0) |= (uint64_t)SRC(ip, i*64+ 3) << 39 | (uint64_t)SRC1(ip, i*64+4) << 52;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*13+ 1) = (uint64_t)SRC(ip, i*64+ 4) >> 12;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*13+ 1) |= (uint64_t)SRC(ip, i*64+ 5) << 1;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*13+ 1) |= (uint64_t)SRC(ip, i*64+ 6) << 14;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*13+ 1) |= (uint64_t)SRC(ip, i*64+ 7) << 27;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*13+ 1) |= (uint64_t)SRC(ip, i*64+ 8) << 40 | (uint64_t)SRC1(ip, i*64+9) << 53;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*13+ 2) = (uint64_t)SRC(ip, i*64+ 9) >> 11;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*64+10) << 2;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*64+11) << 15;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*64+12) << 28;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*64+13) << 41 | (uint64_t)SRC1(ip, i*64+14) << 54;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*13+ 3) = (uint64_t)SRC(ip, i*64+14) >> 10;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*13+ 3) |= (uint64_t)SRC(ip, i*64+15) << 3;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*13+ 3) |= (uint64_t)SRC(ip, i*64+16) << 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*13+ 3) |= (uint64_t)SRC(ip, i*64+17) << 29;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*13+ 3) |= (uint64_t)SRC(ip, i*64+18) << 42 | (uint64_t)SRC1(ip, i*64+19) << 55;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*13+ 4) = (uint64_t)SRC(ip, i*64+19) >> 9;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*64+20) << 4;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*64+21) << 17;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*64+22) << 30;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*64+23) << 43 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*13+ 5) = (uint64_t)SRC(ip, i*64+24) >> 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*13+ 5) |= (uint64_t)SRC(ip, i*64+25) << 5;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*13+ 5) |= (uint64_t)SRC(ip, i*64+26) << 18;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*13+ 5) |= (uint64_t)SRC(ip, i*64+27) << 31;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*13+ 5) |= (uint64_t)SRC(ip, i*64+28) << 44 | (uint64_t)SRC1(ip, i*64+29) << 57;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*13+ 6) = (uint64_t)SRC(ip, i*64+29) >> 7;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*13+ 6) |= (uint64_t)SRC(ip, i*64+30) << 6;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*13+ 6) |= (uint64_t)SRC(ip, i*64+31) << 19;\ +} + +#define BITPACK64_13(ip, op, parm) { \ + BITBLK64_13(ip, 0, op, parm); SRCI(ip); op += 13*4/sizeof(op[0]);\ +} + +#define BITBLK64_14(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*7+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*7+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 14;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*7+ 0) |= (uint64_t)SRC(ip, i*32+ 2) << 28;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*7+ 0) |= (uint64_t)SRC(ip, i*32+ 3) << 42 | (uint64_t)SRC1(ip, i*32+4) << 56;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*7+ 1) = (uint64_t)SRC(ip, i*32+ 4) >> 8;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*7+ 1) |= (uint64_t)SRC(ip, i*32+ 5) << 6;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*7+ 1) |= (uint64_t)SRC(ip, i*32+ 6) << 20;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*7+ 1) |= (uint64_t)SRC(ip, i*32+ 7) << 34;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*7+ 1) |= (uint64_t)SRC(ip, i*32+ 8) << 48 | (uint64_t)SRC1(ip, i*32+9) << 62;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*7+ 2) = (uint64_t)SRC(ip, i*32+ 9) >> 2;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*7+ 2) |= (uint64_t)SRC(ip, i*32+10) << 12;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*7+ 2) |= (uint64_t)SRC(ip, i*32+11) << 26;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*7+ 2) |= (uint64_t)SRC(ip, i*32+12) << 40 | (uint64_t)SRC1(ip, i*32+13) << 54;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*7+ 3) = (uint64_t)SRC(ip, i*32+13) >> 10;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*32+14) << 4;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*32+15) << 18;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*32+16) << 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*32+17) << 46 | (uint64_t)SRC1(ip, i*32+18) << 60;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*7+ 4) = (uint64_t)SRC(ip, i*32+18) >> 4;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*7+ 4) |= (uint64_t)SRC(ip, i*32+19) << 10;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*7+ 4) |= (uint64_t)SRC(ip, i*32+20) << 24;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*7+ 4) |= (uint64_t)SRC(ip, i*32+21) << 38 | (uint64_t)SRC1(ip, i*32+22) << 52;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*7+ 5) = (uint64_t)SRC(ip, i*32+22) >> 12;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*7+ 5) |= (uint64_t)SRC(ip, i*32+23) << 2;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*7+ 5) |= (uint64_t)SRC(ip, i*32+24) << 16;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*7+ 5) |= (uint64_t)SRC(ip, i*32+25) << 30;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*7+ 5) |= (uint64_t)SRC(ip, i*32+26) << 44 | (uint64_t)SRC1(ip, i*32+27) << 58;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*7+ 6) = (uint64_t)SRC(ip, i*32+27) >> 6;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*32+28) << 8;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*32+29) << 22;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*32+30) << 36;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*32+31) << 50;\ +} + +#define BITPACK64_14(ip, op, parm) { \ + BITBLK64_14(ip, 0, op, parm); SRCI(ip); op += 14*4/sizeof(op[0]);\ +} + +#define BITBLK64_15(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*15+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*15+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 15;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*15+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 30;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*15+ 0) |= (uint64_t)SRC(ip, i*64+ 3) << 45 | (uint64_t)SRC1(ip, i*64+4) << 60;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*15+ 1) = (uint64_t)SRC(ip, i*64+ 4) >> 4;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*15+ 1) |= (uint64_t)SRC(ip, i*64+ 5) << 11;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*15+ 1) |= (uint64_t)SRC(ip, i*64+ 6) << 26;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*15+ 1) |= (uint64_t)SRC(ip, i*64+ 7) << 41 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*15+ 2) = (uint64_t)SRC(ip, i*64+ 8) >> 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*15+ 2) |= (uint64_t)SRC(ip, i*64+ 9) << 7;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*15+ 2) |= (uint64_t)SRC(ip, i*64+10) << 22;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*15+ 2) |= (uint64_t)SRC(ip, i*64+11) << 37 | (uint64_t)SRC1(ip, i*64+12) << 52;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*15+ 3) = (uint64_t)SRC(ip, i*64+12) >> 12;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*15+ 3) |= (uint64_t)SRC(ip, i*64+13) << 3;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*15+ 3) |= (uint64_t)SRC(ip, i*64+14) << 18;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*15+ 3) |= (uint64_t)SRC(ip, i*64+15) << 33;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*15+ 3) |= (uint64_t)SRC(ip, i*64+16) << 48 | (uint64_t)SRC1(ip, i*64+17) << 63;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*15+ 4) = (uint64_t)SRC(ip, i*64+17) >> 1;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*15+ 4) |= (uint64_t)SRC(ip, i*64+18) << 14;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*15+ 4) |= (uint64_t)SRC(ip, i*64+19) << 29;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*15+ 4) |= (uint64_t)SRC(ip, i*64+20) << 44 | (uint64_t)SRC1(ip, i*64+21) << 59;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*15+ 5) = (uint64_t)SRC(ip, i*64+21) >> 5;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*15+ 5) |= (uint64_t)SRC(ip, i*64+22) << 10;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*15+ 5) |= (uint64_t)SRC(ip, i*64+23) << 25;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*15+ 5) |= (uint64_t)SRC(ip, i*64+24) << 40 | (uint64_t)SRC1(ip, i*64+25) << 55;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*15+ 6) = (uint64_t)SRC(ip, i*64+25) >> 9;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*15+ 6) |= (uint64_t)SRC(ip, i*64+26) << 6;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*15+ 6) |= (uint64_t)SRC(ip, i*64+27) << 21;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*15+ 6) |= (uint64_t)SRC(ip, i*64+28) << 36 | (uint64_t)SRC1(ip, i*64+29) << 51;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*15+ 7) = (uint64_t)SRC(ip, i*64+29) >> 13;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*15+ 7) |= (uint64_t)SRC(ip, i*64+30) << 2;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*15+ 7) |= (uint64_t)SRC(ip, i*64+31) << 17;\ +} + +#define BITPACK64_15(ip, op, parm) { \ + BITBLK64_15(ip, 0, op, parm); SRCI(ip); op += 15*4/sizeof(op[0]);\ +} + +#define BITBLK64_16(ip, i, op, parm) { \ + IPPB(ip, i*4+ 0, parm); *(uint16_t *)(op+i*8+ 0) = SRC(ip, i*4+ 0);\ + IPPB(ip, i*4+ 1, parm); *(uint16_t *)(op+i*8+ 2) = SRC(ip, i*4+ 1);\ + IPPB(ip, i*4+ 2, parm); *(uint16_t *)(op+i*8+ 4) = SRC(ip, i*4+ 2);\ + IPPB(ip, i*4+ 3, parm); *(uint16_t *)(op+i*8+ 6) = SRC(ip, i*4+ 3);;\ +} + +#define BITPACK64_16(ip, op, parm) { \ + BITBLK64_16(ip, 0, op, parm);\ + BITBLK64_16(ip, 1, op, parm);\ + BITBLK64_16(ip, 2, op, parm);\ + BITBLK64_16(ip, 3, op, parm);\ + BITBLK64_16(ip, 4, op, parm);\ + BITBLK64_16(ip, 5, op, parm);\ + BITBLK64_16(ip, 6, op, parm);\ + BITBLK64_16(ip, 7, op, parm); SRCI(ip); op += 16*4/sizeof(op[0]);\ +} + +#define BITBLK64_17(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*17+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*17+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 17;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*17+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 34 | (uint64_t)SRC1(ip, i*64+3) << 51;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*17+ 1) = (uint64_t)SRC(ip, i*64+ 3) >> 13;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*17+ 1) |= (uint64_t)SRC(ip, i*64+ 4) << 4;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*17+ 1) |= (uint64_t)SRC(ip, i*64+ 5) << 21;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*17+ 1) |= (uint64_t)SRC(ip, i*64+ 6) << 38 | (uint64_t)SRC1(ip, i*64+7) << 55;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*17+ 2) = (uint64_t)SRC(ip, i*64+ 7) >> 9;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*17+ 2) |= (uint64_t)SRC(ip, i*64+ 8) << 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*17+ 2) |= (uint64_t)SRC(ip, i*64+ 9) << 25;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*17+ 2) |= (uint64_t)SRC(ip, i*64+10) << 42 | (uint64_t)SRC1(ip, i*64+11) << 59;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*17+ 3) = (uint64_t)SRC(ip, i*64+11) >> 5;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*17+ 3) |= (uint64_t)SRC(ip, i*64+12) << 12;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*17+ 3) |= (uint64_t)SRC(ip, i*64+13) << 29;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*17+ 3) |= (uint64_t)SRC(ip, i*64+14) << 46 | (uint64_t)SRC1(ip, i*64+15) << 63;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*17+ 4) = (uint64_t)SRC(ip, i*64+15) >> 1;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*17+ 4) |= (uint64_t)SRC(ip, i*64+16) << 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*17+ 4) |= (uint64_t)SRC(ip, i*64+17) << 33 | (uint64_t)SRC1(ip, i*64+18) << 50;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*17+ 5) = (uint64_t)SRC(ip, i*64+18) >> 14;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*17+ 5) |= (uint64_t)SRC(ip, i*64+19) << 3;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*17+ 5) |= (uint64_t)SRC(ip, i*64+20) << 20;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*17+ 5) |= (uint64_t)SRC(ip, i*64+21) << 37 | (uint64_t)SRC1(ip, i*64+22) << 54;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*17+ 6) = (uint64_t)SRC(ip, i*64+22) >> 10;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*17+ 6) |= (uint64_t)SRC(ip, i*64+23) << 7;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*17+ 6) |= (uint64_t)SRC(ip, i*64+24) << 24;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*17+ 6) |= (uint64_t)SRC(ip, i*64+25) << 41 | (uint64_t)SRC1(ip, i*64+26) << 58;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*17+ 7) = (uint64_t)SRC(ip, i*64+26) >> 6;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*17+ 7) |= (uint64_t)SRC(ip, i*64+27) << 11;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*17+ 7) |= (uint64_t)SRC(ip, i*64+28) << 28;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*17+ 7) |= (uint64_t)SRC(ip, i*64+29) << 45 | (uint64_t)SRC1(ip, i*64+30) << 62;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*17+ 8) = (uint64_t)SRC(ip, i*64+30) >> 2;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*17+ 8) |= (uint64_t)SRC(ip, i*64+31) << 15;\ +} + +#define BITPACK64_17(ip, op, parm) { \ + BITBLK64_17(ip, 0, op, parm); SRCI(ip); op += 17*4/sizeof(op[0]);\ +} + +#define BITBLK64_18(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*9+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 18;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*9+ 0) |= (uint64_t)SRC(ip, i*32+ 2) << 36 | (uint64_t)SRC1(ip, i*32+3) << 54;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*9+ 1) = (uint64_t)SRC(ip, i*32+ 3) >> 10;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*32+ 4) << 8;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*32+ 5) << 26;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*32+ 6) << 44 | (uint64_t)SRC1(ip, i*32+7) << 62;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*9+ 2) = (uint64_t)SRC(ip, i*32+ 7) >> 2;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*32+ 8) << 16;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*32+ 9) << 34 | (uint64_t)SRC1(ip, i*32+10) << 52;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*9+ 3) = (uint64_t)SRC(ip, i*32+10) >> 12;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*32+11) << 6;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*32+12) << 24;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*32+13) << 42 | (uint64_t)SRC1(ip, i*32+14) << 60;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*9+ 4) = (uint64_t)SRC(ip, i*32+14) >> 4;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*9+ 4) |= (uint64_t)SRC(ip, i*32+15) << 14;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*9+ 4) |= (uint64_t)SRC(ip, i*32+16) << 32 | (uint64_t)SRC1(ip, i*32+17) << 50;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*9+ 5) = (uint64_t)SRC(ip, i*32+17) >> 14;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*9+ 5) |= (uint64_t)SRC(ip, i*32+18) << 4;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*9+ 5) |= (uint64_t)SRC(ip, i*32+19) << 22;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*9+ 5) |= (uint64_t)SRC(ip, i*32+20) << 40 | (uint64_t)SRC1(ip, i*32+21) << 58;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*9+ 6) = (uint64_t)SRC(ip, i*32+21) >> 6;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*9+ 6) |= (uint64_t)SRC(ip, i*32+22) << 12;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*9+ 6) |= (uint64_t)SRC(ip, i*32+23) << 30 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*9+ 7) = (uint64_t)SRC(ip, i*32+24) >> 16;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*9+ 7) |= (uint64_t)SRC(ip, i*32+25) << 2;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*9+ 7) |= (uint64_t)SRC(ip, i*32+26) << 20;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*9+ 7) |= (uint64_t)SRC(ip, i*32+27) << 38 | (uint64_t)SRC1(ip, i*32+28) << 56;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*9+ 8) = (uint64_t)SRC(ip, i*32+28) >> 8;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*9+ 8) |= (uint64_t)SRC(ip, i*32+29) << 10;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*9+ 8) |= (uint64_t)SRC(ip, i*32+30) << 28;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*9+ 8) |= (uint64_t)SRC(ip, i*32+31) << 46;\ +} + +#define BITPACK64_18(ip, op, parm) { \ + BITBLK64_18(ip, 0, op, parm); SRCI(ip); op += 18*4/sizeof(op[0]);\ +} + +#define BITBLK64_19(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*19+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*19+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 19;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*19+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 38 | (uint64_t)SRC1(ip, i*64+3) << 57;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*19+ 1) = (uint64_t)SRC(ip, i*64+ 3) >> 7;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*19+ 1) |= (uint64_t)SRC(ip, i*64+ 4) << 12;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*19+ 1) |= (uint64_t)SRC(ip, i*64+ 5) << 31 | (uint64_t)SRC1(ip, i*64+6) << 50;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*19+ 2) = (uint64_t)SRC(ip, i*64+ 6) >> 14;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*19+ 2) |= (uint64_t)SRC(ip, i*64+ 7) << 5;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*19+ 2) |= (uint64_t)SRC(ip, i*64+ 8) << 24;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*19+ 2) |= (uint64_t)SRC(ip, i*64+ 9) << 43 | (uint64_t)SRC1(ip, i*64+10) << 62;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*19+ 3) = (uint64_t)SRC(ip, i*64+10) >> 2;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*19+ 3) |= (uint64_t)SRC(ip, i*64+11) << 17;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*19+ 3) |= (uint64_t)SRC(ip, i*64+12) << 36 | (uint64_t)SRC1(ip, i*64+13) << 55;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*19+ 4) = (uint64_t)SRC(ip, i*64+13) >> 9;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*19+ 4) |= (uint64_t)SRC(ip, i*64+14) << 10;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*19+ 4) |= (uint64_t)SRC(ip, i*64+15) << 29 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*19+ 5) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*19+ 5) |= (uint64_t)SRC(ip, i*64+17) << 3;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*19+ 5) |= (uint64_t)SRC(ip, i*64+18) << 22;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*19+ 5) |= (uint64_t)SRC(ip, i*64+19) << 41 | (uint64_t)SRC1(ip, i*64+20) << 60;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*19+ 6) = (uint64_t)SRC(ip, i*64+20) >> 4;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*19+ 6) |= (uint64_t)SRC(ip, i*64+21) << 15;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*19+ 6) |= (uint64_t)SRC(ip, i*64+22) << 34 | (uint64_t)SRC1(ip, i*64+23) << 53;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*19+ 7) = (uint64_t)SRC(ip, i*64+23) >> 11;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*19+ 7) |= (uint64_t)SRC(ip, i*64+24) << 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*19+ 7) |= (uint64_t)SRC(ip, i*64+25) << 27 | (uint64_t)SRC1(ip, i*64+26) << 46;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*19+ 8) = (uint64_t)SRC(ip, i*64+26) >> 18;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*19+ 8) |= (uint64_t)SRC(ip, i*64+27) << 1;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*19+ 8) |= (uint64_t)SRC(ip, i*64+28) << 20;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*19+ 8) |= (uint64_t)SRC(ip, i*64+29) << 39 | (uint64_t)SRC1(ip, i*64+30) << 58;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*19+ 9) = (uint64_t)SRC(ip, i*64+30) >> 6;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*19+ 9) |= (uint64_t)SRC(ip, i*64+31) << 13;\ +} + +#define BITPACK64_19(ip, op, parm) { \ + BITBLK64_19(ip, 0, op, parm); SRCI(ip); op += 19*4/sizeof(op[0]);\ +} + +#define BITBLK64_20(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*5+ 0) = (uint64_t)SRC(ip, i*16+ 0) ;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*16+ 1) << 20;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*5+ 0) |= (uint64_t)SRC(ip, i*16+ 2) << 40 | (uint64_t)SRC1(ip, i*16+3) << 60;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*5+ 1) = (uint64_t)SRC(ip, i*16+ 3) >> 4;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*16+ 4) << 16;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*16+ 5) << 36 | (uint64_t)SRC1(ip, i*16+6) << 56;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*5+ 2) = (uint64_t)SRC(ip, i*16+ 6) >> 8;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*16+ 7) << 12;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*5+ 2) |= (uint64_t)SRC(ip, i*16+ 8) << 32 | (uint64_t)SRC1(ip, i*16+9) << 52;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*5+ 3) = (uint64_t)SRC(ip, i*16+ 9) >> 12;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*16+10) << 8;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*16+11) << 28 | (uint64_t)SRC1(ip, i*16+12) << 48;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*5+ 4) = (uint64_t)SRC(ip, i*16+12) >> 16;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*16+13) << 4;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*16+14) << 24;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*16+15) << 44;\ +} + +#define BITPACK64_20(ip, op, parm) { \ + BITBLK64_20(ip, 0, op, parm);\ + BITBLK64_20(ip, 1, op, parm); SRCI(ip); op += 20*4/sizeof(op[0]);\ +} + +#define BITBLK64_21(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*21+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*21+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 21;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*21+ 0) |= (uint64_t)SRC(ip, i*64+ 2) << 42 | (uint64_t)SRC1(ip, i*64+3) << 63;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*21+ 1) = (uint64_t)SRC(ip, i*64+ 3) >> 1;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*21+ 1) |= (uint64_t)SRC(ip, i*64+ 4) << 20;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*21+ 1) |= (uint64_t)SRC(ip, i*64+ 5) << 41 | (uint64_t)SRC1(ip, i*64+6) << 62;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*21+ 2) = (uint64_t)SRC(ip, i*64+ 6) >> 2;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*21+ 2) |= (uint64_t)SRC(ip, i*64+ 7) << 19;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*21+ 2) |= (uint64_t)SRC(ip, i*64+ 8) << 40 | (uint64_t)SRC1(ip, i*64+9) << 61;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*21+ 3) = (uint64_t)SRC(ip, i*64+ 9) >> 3;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*21+ 3) |= (uint64_t)SRC(ip, i*64+10) << 18;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*21+ 3) |= (uint64_t)SRC(ip, i*64+11) << 39 | (uint64_t)SRC1(ip, i*64+12) << 60;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*21+ 4) = (uint64_t)SRC(ip, i*64+12) >> 4;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*21+ 4) |= (uint64_t)SRC(ip, i*64+13) << 17;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*21+ 4) |= (uint64_t)SRC(ip, i*64+14) << 38 | (uint64_t)SRC1(ip, i*64+15) << 59;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*21+ 5) = (uint64_t)SRC(ip, i*64+15) >> 5;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*21+ 5) |= (uint64_t)SRC(ip, i*64+16) << 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*21+ 5) |= (uint64_t)SRC(ip, i*64+17) << 37 | (uint64_t)SRC1(ip, i*64+18) << 58;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*21+ 6) = (uint64_t)SRC(ip, i*64+18) >> 6;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*21+ 6) |= (uint64_t)SRC(ip, i*64+19) << 15;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*21+ 6) |= (uint64_t)SRC(ip, i*64+20) << 36 | (uint64_t)SRC1(ip, i*64+21) << 57;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*21+ 7) = (uint64_t)SRC(ip, i*64+21) >> 7;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*21+ 7) |= (uint64_t)SRC(ip, i*64+22) << 14;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*21+ 7) |= (uint64_t)SRC(ip, i*64+23) << 35 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*21+ 8) = (uint64_t)SRC(ip, i*64+24) >> 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*21+ 8) |= (uint64_t)SRC(ip, i*64+25) << 13;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*21+ 8) |= (uint64_t)SRC(ip, i*64+26) << 34 | (uint64_t)SRC1(ip, i*64+27) << 55;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*21+ 9) = (uint64_t)SRC(ip, i*64+27) >> 9;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*21+ 9) |= (uint64_t)SRC(ip, i*64+28) << 12;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*21+ 9) |= (uint64_t)SRC(ip, i*64+29) << 33 | (uint64_t)SRC1(ip, i*64+30) << 54;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*21+10) = (uint64_t)SRC(ip, i*64+30) >> 10;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*21+10) |= (uint64_t)SRC(ip, i*64+31) << 11;\ +} + +#define BITPACK64_21(ip, op, parm) { \ + BITBLK64_21(ip, 0, op, parm); SRCI(ip); op += 21*4/sizeof(op[0]);\ +} + +#define BITBLK64_22(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*11+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*11+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 22 | (uint64_t)SRC1(ip, i*32+2) << 44;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*11+ 1) = (uint64_t)SRC(ip, i*32+ 2) >> 20;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*32+ 3) << 2;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*11+ 1) |= (uint64_t)SRC(ip, i*32+ 4) << 24 | (uint64_t)SRC1(ip, i*32+5) << 46;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*11+ 2) = (uint64_t)SRC(ip, i*32+ 5) >> 18;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*32+ 6) << 4;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*32+ 7) << 26 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*11+ 3) = (uint64_t)SRC(ip, i*32+ 8) >> 16;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*32+ 9) << 6;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*11+ 3) |= (uint64_t)SRC(ip, i*32+10) << 28 | (uint64_t)SRC1(ip, i*32+11) << 50;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*11+ 4) = (uint64_t)SRC(ip, i*32+11) >> 14;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*32+12) << 8;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*32+13) << 30 | (uint64_t)SRC1(ip, i*32+14) << 52;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*11+ 5) = (uint64_t)SRC(ip, i*32+14) >> 12;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*11+ 5) |= (uint64_t)SRC(ip, i*32+15) << 10;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*11+ 5) |= (uint64_t)SRC(ip, i*32+16) << 32 | (uint64_t)SRC1(ip, i*32+17) << 54;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*11+ 6) = (uint64_t)SRC(ip, i*32+17) >> 10;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*11+ 6) |= (uint64_t)SRC(ip, i*32+18) << 12;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*11+ 6) |= (uint64_t)SRC(ip, i*32+19) << 34 | (uint64_t)SRC1(ip, i*32+20) << 56;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*11+ 7) = (uint64_t)SRC(ip, i*32+20) >> 8;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*11+ 7) |= (uint64_t)SRC(ip, i*32+21) << 14;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*11+ 7) |= (uint64_t)SRC(ip, i*32+22) << 36 | (uint64_t)SRC1(ip, i*32+23) << 58;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*11+ 8) = (uint64_t)SRC(ip, i*32+23) >> 6;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*11+ 8) |= (uint64_t)SRC(ip, i*32+24) << 16;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*11+ 8) |= (uint64_t)SRC(ip, i*32+25) << 38 | (uint64_t)SRC1(ip, i*32+26) << 60;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*11+ 9) = (uint64_t)SRC(ip, i*32+26) >> 4;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*11+ 9) |= (uint64_t)SRC(ip, i*32+27) << 18;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*11+ 9) |= (uint64_t)SRC(ip, i*32+28) << 40 | (uint64_t)SRC1(ip, i*32+29) << 62;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*11+10) = (uint64_t)SRC(ip, i*32+29) >> 2;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*11+10) |= (uint64_t)SRC(ip, i*32+30) << 20;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*11+10) |= (uint64_t)SRC(ip, i*32+31) << 42;\ +} + +#define BITPACK64_22(ip, op, parm) { \ + BITBLK64_22(ip, 0, op, parm); SRCI(ip); op += 22*4/sizeof(op[0]);\ +} + +#define BITBLK64_23(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*23+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*23+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 23 | (uint64_t)SRC1(ip, i*64+2) << 46;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*23+ 1) = (uint64_t)SRC(ip, i*64+ 2) >> 18;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*23+ 1) |= (uint64_t)SRC(ip, i*64+ 3) << 5;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*23+ 1) |= (uint64_t)SRC(ip, i*64+ 4) << 28 | (uint64_t)SRC1(ip, i*64+5) << 51;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*23+ 2) = (uint64_t)SRC(ip, i*64+ 5) >> 13;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*23+ 2) |= (uint64_t)SRC(ip, i*64+ 6) << 10;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*23+ 2) |= (uint64_t)SRC(ip, i*64+ 7) << 33 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*23+ 3) = (uint64_t)SRC(ip, i*64+ 8) >> 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*23+ 3) |= (uint64_t)SRC(ip, i*64+ 9) << 15;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*23+ 3) |= (uint64_t)SRC(ip, i*64+10) << 38 | (uint64_t)SRC1(ip, i*64+11) << 61;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*23+ 4) = (uint64_t)SRC(ip, i*64+11) >> 3;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*23+ 4) |= (uint64_t)SRC(ip, i*64+12) << 20 | (uint64_t)SRC1(ip, i*64+13) << 43;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*23+ 5) = (uint64_t)SRC(ip, i*64+13) >> 21;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*23+ 5) |= (uint64_t)SRC(ip, i*64+14) << 2;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*23+ 5) |= (uint64_t)SRC(ip, i*64+15) << 25 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*23+ 6) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*23+ 6) |= (uint64_t)SRC(ip, i*64+17) << 7;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*23+ 6) |= (uint64_t)SRC(ip, i*64+18) << 30 | (uint64_t)SRC1(ip, i*64+19) << 53;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*23+ 7) = (uint64_t)SRC(ip, i*64+19) >> 11;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*23+ 7) |= (uint64_t)SRC(ip, i*64+20) << 12;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*23+ 7) |= (uint64_t)SRC(ip, i*64+21) << 35 | (uint64_t)SRC1(ip, i*64+22) << 58;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*23+ 8) = (uint64_t)SRC(ip, i*64+22) >> 6;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*23+ 8) |= (uint64_t)SRC(ip, i*64+23) << 17;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*23+ 8) |= (uint64_t)SRC(ip, i*64+24) << 40 | (uint64_t)SRC1(ip, i*64+25) << 63;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*23+ 9) = (uint64_t)SRC(ip, i*64+25) >> 1;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*23+ 9) |= (uint64_t)SRC(ip, i*64+26) << 22 | (uint64_t)SRC1(ip, i*64+27) << 45;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*23+10) = (uint64_t)SRC(ip, i*64+27) >> 19;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*23+10) |= (uint64_t)SRC(ip, i*64+28) << 4;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*23+10) |= (uint64_t)SRC(ip, i*64+29) << 27 | (uint64_t)SRC1(ip, i*64+30) << 50;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*23+11) = (uint64_t)SRC(ip, i*64+30) >> 14;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*23+11) |= (uint64_t)SRC(ip, i*64+31) << 9;\ +} + +#define BITPACK64_23(ip, op, parm) { \ + BITBLK64_23(ip, 0, op, parm); SRCI(ip); op += 23*4/sizeof(op[0]);\ +} + +#define BITBLK64_24(ip, i, op, parm) { ;\ + IPPB(ip, i*8+ 0, parm); *((uint64_t *)op+i*3+ 0) = (uint64_t)SRC(ip, i*8+ 0) ;\ + IPPB(ip, i*8+ 1, parm); *((uint64_t *)op+i*3+ 0) |= (uint64_t)SRC(ip, i*8+ 1) << 24 | (uint64_t)SRC1(ip, i*8+2) << 48;\ + IPPB(ip, i*8+ 2, parm); *((uint64_t *)op+i*3+ 1) = (uint64_t)SRC(ip, i*8+ 2) >> 16;\ + IPPB(ip, i*8+ 3, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*8+ 3) << 8;\ + IPPB(ip, i*8+ 4, parm); *((uint64_t *)op+i*3+ 1) |= (uint64_t)SRC(ip, i*8+ 4) << 32 | (uint64_t)SRC1(ip, i*8+5) << 56;\ + IPPB(ip, i*8+ 5, parm); *((uint64_t *)op+i*3+ 2) = (uint64_t)SRC(ip, i*8+ 5) >> 8;\ + IPPB(ip, i*8+ 6, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*8+ 6) << 16;\ + IPPB(ip, i*8+ 7, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*8+ 7) << 40;\ +} + +#define BITPACK64_24(ip, op, parm) { \ + BITBLK64_24(ip, 0, op, parm);\ + BITBLK64_24(ip, 1, op, parm);\ + BITBLK64_24(ip, 2, op, parm);\ + BITBLK64_24(ip, 3, op, parm); SRCI(ip); op += 24*4/sizeof(op[0]);\ +} + +#define BITBLK64_25(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*25+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*25+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 25 | (uint64_t)SRC1(ip, i*64+2) << 50;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*25+ 1) = (uint64_t)SRC(ip, i*64+ 2) >> 14;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*25+ 1) |= (uint64_t)SRC(ip, i*64+ 3) << 11;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*25+ 1) |= (uint64_t)SRC(ip, i*64+ 4) << 36 | (uint64_t)SRC1(ip, i*64+5) << 61;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*25+ 2) = (uint64_t)SRC(ip, i*64+ 5) >> 3;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*25+ 2) |= (uint64_t)SRC(ip, i*64+ 6) << 22 | (uint64_t)SRC1(ip, i*64+7) << 47;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*25+ 3) = (uint64_t)SRC(ip, i*64+ 7) >> 17;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*25+ 3) |= (uint64_t)SRC(ip, i*64+ 8) << 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*25+ 3) |= (uint64_t)SRC(ip, i*64+ 9) << 33 | (uint64_t)SRC1(ip, i*64+10) << 58;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*25+ 4) = (uint64_t)SRC(ip, i*64+10) >> 6;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*25+ 4) |= (uint64_t)SRC(ip, i*64+11) << 19 | (uint64_t)SRC1(ip, i*64+12) << 44;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*25+ 5) = (uint64_t)SRC(ip, i*64+12) >> 20;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*25+ 5) |= (uint64_t)SRC(ip, i*64+13) << 5;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*25+ 5) |= (uint64_t)SRC(ip, i*64+14) << 30 | (uint64_t)SRC1(ip, i*64+15) << 55;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*25+ 6) = (uint64_t)SRC(ip, i*64+15) >> 9;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*25+ 6) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 41;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*25+ 7) = (uint64_t)SRC(ip, i*64+17) >> 23;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*25+ 7) |= (uint64_t)SRC(ip, i*64+18) << 2;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*25+ 7) |= (uint64_t)SRC(ip, i*64+19) << 27 | (uint64_t)SRC1(ip, i*64+20) << 52;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*25+ 8) = (uint64_t)SRC(ip, i*64+20) >> 12;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*25+ 8) |= (uint64_t)SRC(ip, i*64+21) << 13;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*25+ 8) |= (uint64_t)SRC(ip, i*64+22) << 38 | (uint64_t)SRC1(ip, i*64+23) << 63;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*25+ 9) = (uint64_t)SRC(ip, i*64+23) >> 1;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*25+ 9) |= (uint64_t)SRC(ip, i*64+24) << 24 | (uint64_t)SRC1(ip, i*64+25) << 49;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*25+10) = (uint64_t)SRC(ip, i*64+25) >> 15;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*25+10) |= (uint64_t)SRC(ip, i*64+26) << 10;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*25+10) |= (uint64_t)SRC(ip, i*64+27) << 35 | (uint64_t)SRC1(ip, i*64+28) << 60;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*25+11) = (uint64_t)SRC(ip, i*64+28) >> 4;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*25+11) |= (uint64_t)SRC(ip, i*64+29) << 21 | (uint64_t)SRC1(ip, i*64+30) << 46;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*25+12) = (uint64_t)SRC(ip, i*64+30) >> 18;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*25+12) |= (uint64_t)SRC(ip, i*64+31) << 7;\ +} + +#define BITPACK64_25(ip, op, parm) { \ + BITBLK64_25(ip, 0, op, parm); SRCI(ip); op += 25*4/sizeof(op[0]);\ +} + +#define BITBLK64_26(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*13+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*13+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 26 | (uint64_t)SRC1(ip, i*32+2) << 52;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*13+ 1) = (uint64_t)SRC(ip, i*32+ 2) >> 12;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*13+ 1) |= (uint64_t)SRC(ip, i*32+ 3) << 14 | (uint64_t)SRC1(ip, i*32+4) << 40;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*13+ 2) = (uint64_t)SRC(ip, i*32+ 4) >> 24;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*32+ 5) << 2;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*13+ 2) |= (uint64_t)SRC(ip, i*32+ 6) << 28 | (uint64_t)SRC1(ip, i*32+7) << 54;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*13+ 3) = (uint64_t)SRC(ip, i*32+ 7) >> 10;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*13+ 3) |= (uint64_t)SRC(ip, i*32+ 8) << 16 | (uint64_t)SRC1(ip, i*32+9) << 42;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*13+ 4) = (uint64_t)SRC(ip, i*32+ 9) >> 22;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*32+10) << 4;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*32+11) << 30 | (uint64_t)SRC1(ip, i*32+12) << 56;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*13+ 5) = (uint64_t)SRC(ip, i*32+12) >> 8;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*13+ 5) |= (uint64_t)SRC(ip, i*32+13) << 18 | (uint64_t)SRC1(ip, i*32+14) << 44;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*13+ 6) = (uint64_t)SRC(ip, i*32+14) >> 20;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*13+ 6) |= (uint64_t)SRC(ip, i*32+15) << 6;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*13+ 6) |= (uint64_t)SRC(ip, i*32+16) << 32 | (uint64_t)SRC1(ip, i*32+17) << 58;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*13+ 7) = (uint64_t)SRC(ip, i*32+17) >> 6;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*13+ 7) |= (uint64_t)SRC(ip, i*32+18) << 20 | (uint64_t)SRC1(ip, i*32+19) << 46;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*13+ 8) = (uint64_t)SRC(ip, i*32+19) >> 18;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*13+ 8) |= (uint64_t)SRC(ip, i*32+20) << 8;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*13+ 8) |= (uint64_t)SRC(ip, i*32+21) << 34 | (uint64_t)SRC1(ip, i*32+22) << 60;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*13+ 9) = (uint64_t)SRC(ip, i*32+22) >> 4;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*13+ 9) |= (uint64_t)SRC(ip, i*32+23) << 22 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*13+10) = (uint64_t)SRC(ip, i*32+24) >> 16;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*13+10) |= (uint64_t)SRC(ip, i*32+25) << 10;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*13+10) |= (uint64_t)SRC(ip, i*32+26) << 36 | (uint64_t)SRC1(ip, i*32+27) << 62;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*13+11) = (uint64_t)SRC(ip, i*32+27) >> 2;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*13+11) |= (uint64_t)SRC(ip, i*32+28) << 24 | (uint64_t)SRC1(ip, i*32+29) << 50;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*13+12) = (uint64_t)SRC(ip, i*32+29) >> 14;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*13+12) |= (uint64_t)SRC(ip, i*32+30) << 12;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*13+12) |= (uint64_t)SRC(ip, i*32+31) << 38;\ +} + +#define BITPACK64_26(ip, op, parm) { \ + BITBLK64_26(ip, 0, op, parm); SRCI(ip); op += 26*4/sizeof(op[0]);\ +} + +#define BITBLK64_27(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*27+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*27+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 27 | (uint64_t)SRC1(ip, i*64+2) << 54;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*27+ 1) = (uint64_t)SRC(ip, i*64+ 2) >> 10;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*27+ 1) |= (uint64_t)SRC(ip, i*64+ 3) << 17 | (uint64_t)SRC1(ip, i*64+4) << 44;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*27+ 2) = (uint64_t)SRC(ip, i*64+ 4) >> 20;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*27+ 2) |= (uint64_t)SRC(ip, i*64+ 5) << 7;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*27+ 2) |= (uint64_t)SRC(ip, i*64+ 6) << 34 | (uint64_t)SRC1(ip, i*64+7) << 61;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*27+ 3) = (uint64_t)SRC(ip, i*64+ 7) >> 3;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*27+ 3) |= (uint64_t)SRC(ip, i*64+ 8) << 24 | (uint64_t)SRC1(ip, i*64+9) << 51;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*27+ 4) = (uint64_t)SRC(ip, i*64+ 9) >> 13;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*27+ 4) |= (uint64_t)SRC(ip, i*64+10) << 14 | (uint64_t)SRC1(ip, i*64+11) << 41;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*27+ 5) = (uint64_t)SRC(ip, i*64+11) >> 23;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*27+ 5) |= (uint64_t)SRC(ip, i*64+12) << 4;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*27+ 5) |= (uint64_t)SRC(ip, i*64+13) << 31 | (uint64_t)SRC1(ip, i*64+14) << 58;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*27+ 6) = (uint64_t)SRC(ip, i*64+14) >> 6;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*27+ 6) |= (uint64_t)SRC(ip, i*64+15) << 21 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*27+ 7) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*27+ 7) |= (uint64_t)SRC(ip, i*64+17) << 11 | (uint64_t)SRC1(ip, i*64+18) << 38;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*27+ 8) = (uint64_t)SRC(ip, i*64+18) >> 26;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*27+ 8) |= (uint64_t)SRC(ip, i*64+19) << 1;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*27+ 8) |= (uint64_t)SRC(ip, i*64+20) << 28 | (uint64_t)SRC1(ip, i*64+21) << 55;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*27+ 9) = (uint64_t)SRC(ip, i*64+21) >> 9;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*27+ 9) |= (uint64_t)SRC(ip, i*64+22) << 18 | (uint64_t)SRC1(ip, i*64+23) << 45;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*27+10) = (uint64_t)SRC(ip, i*64+23) >> 19;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*27+10) |= (uint64_t)SRC(ip, i*64+24) << 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*27+10) |= (uint64_t)SRC(ip, i*64+25) << 35 | (uint64_t)SRC1(ip, i*64+26) << 62;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*27+11) = (uint64_t)SRC(ip, i*64+26) >> 2;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*27+11) |= (uint64_t)SRC(ip, i*64+27) << 25 | (uint64_t)SRC1(ip, i*64+28) << 52;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*27+12) = (uint64_t)SRC(ip, i*64+28) >> 12;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*27+12) |= (uint64_t)SRC(ip, i*64+29) << 15 | (uint64_t)SRC1(ip, i*64+30) << 42;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*27+13) = (uint64_t)SRC(ip, i*64+30) >> 22;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*27+13) |= (uint64_t)SRC(ip, i*64+31) << 5;\ +} + +#define BITPACK64_27(ip, op, parm) { \ + BITBLK64_27(ip, 0, op, parm); SRCI(ip); op += 27*4/sizeof(op[0]);\ +} + +#define BITBLK64_28(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*7+ 0) = (uint64_t)SRC(ip, i*16+ 0) ;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*7+ 0) |= (uint64_t)SRC(ip, i*16+ 1) << 28 | (uint64_t)SRC1(ip, i*16+2) << 56;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*7+ 1) = (uint64_t)SRC(ip, i*16+ 2) >> 8;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*7+ 1) |= (uint64_t)SRC(ip, i*16+ 3) << 20 | (uint64_t)SRC1(ip, i*16+4) << 48;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*7+ 2) = (uint64_t)SRC(ip, i*16+ 4) >> 16;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*7+ 2) |= (uint64_t)SRC(ip, i*16+ 5) << 12 | (uint64_t)SRC1(ip, i*16+6) << 40;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*7+ 3) = (uint64_t)SRC(ip, i*16+ 6) >> 24;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*16+ 7) << 4;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*7+ 3) |= (uint64_t)SRC(ip, i*16+ 8) << 32 | (uint64_t)SRC1(ip, i*16+9) << 60;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*7+ 4) = (uint64_t)SRC(ip, i*16+ 9) >> 4;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*7+ 4) |= (uint64_t)SRC(ip, i*16+10) << 24 | (uint64_t)SRC1(ip, i*16+11) << 52;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*7+ 5) = (uint64_t)SRC(ip, i*16+11) >> 12;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*7+ 5) |= (uint64_t)SRC(ip, i*16+12) << 16 | (uint64_t)SRC1(ip, i*16+13) << 44;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*7+ 6) = (uint64_t)SRC(ip, i*16+13) >> 20;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*16+14) << 8;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*16+15) << 36;\ +} + +#define BITPACK64_28(ip, op, parm) { \ + BITBLK64_28(ip, 0, op, parm);\ + BITBLK64_28(ip, 1, op, parm); SRCI(ip); op += 28*4/sizeof(op[0]);\ +} + +#define BITBLK64_29(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*29+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*29+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 29 | (uint64_t)SRC1(ip, i*64+2) << 58;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*29+ 1) = (uint64_t)SRC(ip, i*64+ 2) >> 6;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*29+ 1) |= (uint64_t)SRC(ip, i*64+ 3) << 23 | (uint64_t)SRC1(ip, i*64+4) << 52;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*29+ 2) = (uint64_t)SRC(ip, i*64+ 4) >> 12;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*29+ 2) |= (uint64_t)SRC(ip, i*64+ 5) << 17 | (uint64_t)SRC1(ip, i*64+6) << 46;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*29+ 3) = (uint64_t)SRC(ip, i*64+ 6) >> 18;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*29+ 3) |= (uint64_t)SRC(ip, i*64+ 7) << 11 | (uint64_t)SRC1(ip, i*64+8) << 40;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*29+ 4) = (uint64_t)SRC(ip, i*64+ 8) >> 24;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*29+ 4) |= (uint64_t)SRC(ip, i*64+ 9) << 5;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*29+ 4) |= (uint64_t)SRC(ip, i*64+10) << 34 | (uint64_t)SRC1(ip, i*64+11) << 63;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*29+ 5) = (uint64_t)SRC(ip, i*64+11) >> 1;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*29+ 5) |= (uint64_t)SRC(ip, i*64+12) << 28 | (uint64_t)SRC1(ip, i*64+13) << 57;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*29+ 6) = (uint64_t)SRC(ip, i*64+13) >> 7;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*29+ 6) |= (uint64_t)SRC(ip, i*64+14) << 22 | (uint64_t)SRC1(ip, i*64+15) << 51;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*29+ 7) = (uint64_t)SRC(ip, i*64+15) >> 13;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*29+ 7) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 45;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*29+ 8) = (uint64_t)SRC(ip, i*64+17) >> 19;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*29+ 8) |= (uint64_t)SRC(ip, i*64+18) << 10 | (uint64_t)SRC1(ip, i*64+19) << 39;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*29+ 9) = (uint64_t)SRC(ip, i*64+19) >> 25;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*29+ 9) |= (uint64_t)SRC(ip, i*64+20) << 4;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*29+ 9) |= (uint64_t)SRC(ip, i*64+21) << 33 | (uint64_t)SRC1(ip, i*64+22) << 62;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*29+10) = (uint64_t)SRC(ip, i*64+22) >> 2;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*29+10) |= (uint64_t)SRC(ip, i*64+23) << 27 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*29+11) = (uint64_t)SRC(ip, i*64+24) >> 8;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*29+11) |= (uint64_t)SRC(ip, i*64+25) << 21 | (uint64_t)SRC1(ip, i*64+26) << 50;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*29+12) = (uint64_t)SRC(ip, i*64+26) >> 14;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*29+12) |= (uint64_t)SRC(ip, i*64+27) << 15 | (uint64_t)SRC1(ip, i*64+28) << 44;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*29+13) = (uint64_t)SRC(ip, i*64+28) >> 20;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*29+13) |= (uint64_t)SRC(ip, i*64+29) << 9 | (uint64_t)SRC1(ip, i*64+30) << 38;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*29+14) = (uint64_t)SRC(ip, i*64+30) >> 26;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*29+14) |= (uint64_t)SRC(ip, i*64+31) << 3;\ +} + +#define BITPACK64_29(ip, op, parm) { \ + BITBLK64_29(ip, 0, op, parm); SRCI(ip); op += 29*4/sizeof(op[0]);\ +} + +#define BITBLK64_30(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*15+ 0) = (uint64_t)SRC(ip, i*32+ 0) ;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*15+ 0) |= (uint64_t)SRC(ip, i*32+ 1) << 30 | (uint64_t)SRC1(ip, i*32+2) << 60;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*15+ 1) = (uint64_t)SRC(ip, i*32+ 2) >> 4;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*15+ 1) |= (uint64_t)SRC(ip, i*32+ 3) << 26 | (uint64_t)SRC1(ip, i*32+4) << 56;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*15+ 2) = (uint64_t)SRC(ip, i*32+ 4) >> 8;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*15+ 2) |= (uint64_t)SRC(ip, i*32+ 5) << 22 | (uint64_t)SRC1(ip, i*32+6) << 52;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*15+ 3) = (uint64_t)SRC(ip, i*32+ 6) >> 12;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*15+ 3) |= (uint64_t)SRC(ip, i*32+ 7) << 18 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*15+ 4) = (uint64_t)SRC(ip, i*32+ 8) >> 16;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*15+ 4) |= (uint64_t)SRC(ip, i*32+ 9) << 14 | (uint64_t)SRC1(ip, i*32+10) << 44;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*15+ 5) = (uint64_t)SRC(ip, i*32+10) >> 20;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*15+ 5) |= (uint64_t)SRC(ip, i*32+11) << 10 | (uint64_t)SRC1(ip, i*32+12) << 40;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*15+ 6) = (uint64_t)SRC(ip, i*32+12) >> 24;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*15+ 6) |= (uint64_t)SRC(ip, i*32+13) << 6 | (uint64_t)SRC1(ip, i*32+14) << 36;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*15+ 7) = (uint64_t)SRC(ip, i*32+14) >> 28;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*15+ 7) |= (uint64_t)SRC(ip, i*32+15) << 2;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*15+ 7) |= (uint64_t)SRC(ip, i*32+16) << 32 | (uint64_t)SRC1(ip, i*32+17) << 62;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*15+ 8) = (uint64_t)SRC(ip, i*32+17) >> 2;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*15+ 8) |= (uint64_t)SRC(ip, i*32+18) << 28 | (uint64_t)SRC1(ip, i*32+19) << 58;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*15+ 9) = (uint64_t)SRC(ip, i*32+19) >> 6;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*15+ 9) |= (uint64_t)SRC(ip, i*32+20) << 24 | (uint64_t)SRC1(ip, i*32+21) << 54;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*15+10) = (uint64_t)SRC(ip, i*32+21) >> 10;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*15+10) |= (uint64_t)SRC(ip, i*32+22) << 20 | (uint64_t)SRC1(ip, i*32+23) << 50;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*15+11) = (uint64_t)SRC(ip, i*32+23) >> 14;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*15+11) |= (uint64_t)SRC(ip, i*32+24) << 16 | (uint64_t)SRC1(ip, i*32+25) << 46;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*15+12) = (uint64_t)SRC(ip, i*32+25) >> 18;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*15+12) |= (uint64_t)SRC(ip, i*32+26) << 12 | (uint64_t)SRC1(ip, i*32+27) << 42;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*15+13) = (uint64_t)SRC(ip, i*32+27) >> 22;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*15+13) |= (uint64_t)SRC(ip, i*32+28) << 8 | (uint64_t)SRC1(ip, i*32+29) << 38;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*15+14) = (uint64_t)SRC(ip, i*32+29) >> 26;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*15+14) |= (uint64_t)SRC(ip, i*32+30) << 4;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*15+14) |= (uint64_t)SRC(ip, i*32+31) << 34;\ +} + +#define BITPACK64_30(ip, op, parm) { \ + BITBLK64_30(ip, 0, op, parm); SRCI(ip); op += 30*4/sizeof(op[0]);\ +} + +#define BITBLK64_31(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*31+ 0) = (uint64_t)SRC(ip, i*64+ 0) ;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*31+ 0) |= (uint64_t)SRC(ip, i*64+ 1) << 31 | (uint64_t)SRC1(ip, i*64+2) << 62;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*31+ 1) = (uint64_t)SRC(ip, i*64+ 2) >> 2;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*31+ 1) |= (uint64_t)SRC(ip, i*64+ 3) << 29 | (uint64_t)SRC1(ip, i*64+4) << 60;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*31+ 2) = (uint64_t)SRC(ip, i*64+ 4) >> 4;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*31+ 2) |= (uint64_t)SRC(ip, i*64+ 5) << 27 | (uint64_t)SRC1(ip, i*64+6) << 58;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*31+ 3) = (uint64_t)SRC(ip, i*64+ 6) >> 6;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*31+ 3) |= (uint64_t)SRC(ip, i*64+ 7) << 25 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*31+ 4) = (uint64_t)SRC(ip, i*64+ 8) >> 8;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*31+ 4) |= (uint64_t)SRC(ip, i*64+ 9) << 23 | (uint64_t)SRC1(ip, i*64+10) << 54;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*31+ 5) = (uint64_t)SRC(ip, i*64+10) >> 10;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*31+ 5) |= (uint64_t)SRC(ip, i*64+11) << 21 | (uint64_t)SRC1(ip, i*64+12) << 52;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*31+ 6) = (uint64_t)SRC(ip, i*64+12) >> 12;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*31+ 6) |= (uint64_t)SRC(ip, i*64+13) << 19 | (uint64_t)SRC1(ip, i*64+14) << 50;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*31+ 7) = (uint64_t)SRC(ip, i*64+14) >> 14;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*31+ 7) |= (uint64_t)SRC(ip, i*64+15) << 17 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*31+ 8) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*31+ 8) |= (uint64_t)SRC(ip, i*64+17) << 15 | (uint64_t)SRC1(ip, i*64+18) << 46;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*31+ 9) = (uint64_t)SRC(ip, i*64+18) >> 18;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*31+ 9) |= (uint64_t)SRC(ip, i*64+19) << 13 | (uint64_t)SRC1(ip, i*64+20) << 44;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*31+10) = (uint64_t)SRC(ip, i*64+20) >> 20;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*31+10) |= (uint64_t)SRC(ip, i*64+21) << 11 | (uint64_t)SRC1(ip, i*64+22) << 42;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*31+11) = (uint64_t)SRC(ip, i*64+22) >> 22;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*31+11) |= (uint64_t)SRC(ip, i*64+23) << 9 | (uint64_t)SRC1(ip, i*64+24) << 40;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*31+12) = (uint64_t)SRC(ip, i*64+24) >> 24;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*31+12) |= (uint64_t)SRC(ip, i*64+25) << 7 | (uint64_t)SRC1(ip, i*64+26) << 38;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*31+13) = (uint64_t)SRC(ip, i*64+26) >> 26;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*31+13) |= (uint64_t)SRC(ip, i*64+27) << 5 | (uint64_t)SRC1(ip, i*64+28) << 36;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*31+14) = (uint64_t)SRC(ip, i*64+28) >> 28;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*31+14) |= (uint64_t)SRC(ip, i*64+29) << 3 | (uint64_t)SRC1(ip, i*64+30) << 34;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*31+15) = (uint64_t)SRC(ip, i*64+30) >> 30;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*31+15) |= (uint64_t)SRC(ip, i*64+31) << 1;\ +} + +#define BITPACK64_31(ip, op, parm) { \ + BITBLK64_31(ip, 0, op, parm); SRCI(ip); op += 31*4/sizeof(op[0]);\ +} + +#define BITBLK64_32(ip, i, op, parm) { \ + IPPB(ip, i*2+ 0, parm); *(uint32_t *)(op+i*8+ 0) = SRC(ip, i*2+ 0);\ + IPPB(ip, i*2+ 1, parm); *(uint32_t *)(op+i*8+ 4) = SRC(ip, i*2+ 1);;\ +} + +#define BITPACK64_32(ip, op, parm) { \ + BITBLK64_32(ip, 0, op, parm);\ + BITBLK64_32(ip, 1, op, parm);\ + BITBLK64_32(ip, 2, op, parm);\ + BITBLK64_32(ip, 3, op, parm);\ + BITBLK64_32(ip, 4, op, parm);\ + BITBLK64_32(ip, 5, op, parm);\ + BITBLK64_32(ip, 6, op, parm);\ + BITBLK64_32(ip, 7, op, parm);\ + BITBLK64_32(ip, 8, op, parm);\ + BITBLK64_32(ip, 9, op, parm);\ + BITBLK64_32(ip, 10, op, parm);\ + BITBLK64_32(ip, 11, op, parm);\ + BITBLK64_32(ip, 12, op, parm);\ + BITBLK64_32(ip, 13, op, parm);\ + BITBLK64_32(ip, 14, op, parm);\ + BITBLK64_32(ip, 15, op, parm); SRCI(ip); op += 32*4/sizeof(op[0]);\ +} + +#define BITBLK64_33(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*33+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 33;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*33+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 31;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*33+ 1) |= (uint64_t)SRC(ip, i*64+ 2) << 2 | (uint64_t)SRC1(ip, i*64+3) << 35;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*33+ 2) = (uint64_t)SRC(ip, i*64+ 3) >> 29;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*33+ 2) |= (uint64_t)SRC(ip, i*64+ 4) << 4 | (uint64_t)SRC1(ip, i*64+5) << 37;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*33+ 3) = (uint64_t)SRC(ip, i*64+ 5) >> 27;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*33+ 3) |= (uint64_t)SRC(ip, i*64+ 6) << 6 | (uint64_t)SRC1(ip, i*64+7) << 39;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*33+ 4) = (uint64_t)SRC(ip, i*64+ 7) >> 25;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*33+ 4) |= (uint64_t)SRC(ip, i*64+ 8) << 8 | (uint64_t)SRC1(ip, i*64+9) << 41;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*33+ 5) = (uint64_t)SRC(ip, i*64+ 9) >> 23;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*33+ 5) |= (uint64_t)SRC(ip, i*64+10) << 10 | (uint64_t)SRC1(ip, i*64+11) << 43;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*33+ 6) = (uint64_t)SRC(ip, i*64+11) >> 21;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*33+ 6) |= (uint64_t)SRC(ip, i*64+12) << 12 | (uint64_t)SRC1(ip, i*64+13) << 45;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*33+ 7) = (uint64_t)SRC(ip, i*64+13) >> 19;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*33+ 7) |= (uint64_t)SRC(ip, i*64+14) << 14 | (uint64_t)SRC1(ip, i*64+15) << 47;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*33+ 8) = (uint64_t)SRC(ip, i*64+15) >> 17;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*33+ 8) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 49;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*33+ 9) = (uint64_t)SRC(ip, i*64+17) >> 15;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*33+ 9) |= (uint64_t)SRC(ip, i*64+18) << 18 | (uint64_t)SRC1(ip, i*64+19) << 51;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*33+10) = (uint64_t)SRC(ip, i*64+19) >> 13;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*33+10) |= (uint64_t)SRC(ip, i*64+20) << 20 | (uint64_t)SRC1(ip, i*64+21) << 53;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*33+11) = (uint64_t)SRC(ip, i*64+21) >> 11;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*33+11) |= (uint64_t)SRC(ip, i*64+22) << 22 | (uint64_t)SRC1(ip, i*64+23) << 55;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*33+12) = (uint64_t)SRC(ip, i*64+23) >> 9;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*33+12) |= (uint64_t)SRC(ip, i*64+24) << 24 | (uint64_t)SRC1(ip, i*64+25) << 57;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*33+13) = (uint64_t)SRC(ip, i*64+25) >> 7;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*33+13) |= (uint64_t)SRC(ip, i*64+26) << 26 | (uint64_t)SRC1(ip, i*64+27) << 59;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*33+14) = (uint64_t)SRC(ip, i*64+27) >> 5;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*33+14) |= (uint64_t)SRC(ip, i*64+28) << 28 | (uint64_t)SRC1(ip, i*64+29) << 61;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*33+15) = (uint64_t)SRC(ip, i*64+29) >> 3;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*33+15) |= (uint64_t)SRC(ip, i*64+30) << 30 | (uint64_t)SRC1(ip, i*64+31) << 63;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*33+16) = (uint64_t)SRC(ip, i*64+31) >> 1;\ +} + +#define BITPACK64_33(ip, op, parm) { \ + BITBLK64_33(ip, 0, op, parm); SRCI(ip); op += 33*4/sizeof(op[0]);\ +} + +#define BITBLK64_34(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*17+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 34;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*17+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 30;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*17+ 1) |= (uint64_t)SRC(ip, i*32+ 2) << 4 | (uint64_t)SRC1(ip, i*32+3) << 38;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*17+ 2) = (uint64_t)SRC(ip, i*32+ 3) >> 26;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*17+ 2) |= (uint64_t)SRC(ip, i*32+ 4) << 8 | (uint64_t)SRC1(ip, i*32+5) << 42;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*17+ 3) = (uint64_t)SRC(ip, i*32+ 5) >> 22;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*17+ 3) |= (uint64_t)SRC(ip, i*32+ 6) << 12 | (uint64_t)SRC1(ip, i*32+7) << 46;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*17+ 4) = (uint64_t)SRC(ip, i*32+ 7) >> 18;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*17+ 4) |= (uint64_t)SRC(ip, i*32+ 8) << 16 | (uint64_t)SRC1(ip, i*32+9) << 50;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*17+ 5) = (uint64_t)SRC(ip, i*32+ 9) >> 14;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*17+ 5) |= (uint64_t)SRC(ip, i*32+10) << 20 | (uint64_t)SRC1(ip, i*32+11) << 54;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*17+ 6) = (uint64_t)SRC(ip, i*32+11) >> 10;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*17+ 6) |= (uint64_t)SRC(ip, i*32+12) << 24 | (uint64_t)SRC1(ip, i*32+13) << 58;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*17+ 7) = (uint64_t)SRC(ip, i*32+13) >> 6;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*17+ 7) |= (uint64_t)SRC(ip, i*32+14) << 28 | (uint64_t)SRC1(ip, i*32+15) << 62;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*17+ 8) = (uint64_t)SRC(ip, i*32+15) >> 2 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*17+ 9) = (uint64_t)SRC(ip, i*32+16) >> 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*17+ 9) |= (uint64_t)SRC(ip, i*32+17) << 2 | (uint64_t)SRC1(ip, i*32+18) << 36;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*17+10) = (uint64_t)SRC(ip, i*32+18) >> 28;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*17+10) |= (uint64_t)SRC(ip, i*32+19) << 6 | (uint64_t)SRC1(ip, i*32+20) << 40;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*17+11) = (uint64_t)SRC(ip, i*32+20) >> 24;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*17+11) |= (uint64_t)SRC(ip, i*32+21) << 10 | (uint64_t)SRC1(ip, i*32+22) << 44;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*17+12) = (uint64_t)SRC(ip, i*32+22) >> 20;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*17+12) |= (uint64_t)SRC(ip, i*32+23) << 14 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*17+13) = (uint64_t)SRC(ip, i*32+24) >> 16;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*17+13) |= (uint64_t)SRC(ip, i*32+25) << 18 | (uint64_t)SRC1(ip, i*32+26) << 52;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*17+14) = (uint64_t)SRC(ip, i*32+26) >> 12;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*17+14) |= (uint64_t)SRC(ip, i*32+27) << 22 | (uint64_t)SRC1(ip, i*32+28) << 56;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*17+15) = (uint64_t)SRC(ip, i*32+28) >> 8;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*17+15) |= (uint64_t)SRC(ip, i*32+29) << 26 | (uint64_t)SRC1(ip, i*32+30) << 60;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*17+16) = (uint64_t)SRC(ip, i*32+30) >> 4;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*17+16) |= (uint64_t)SRC(ip, i*32+31) << 30;\ +} + +#define BITPACK64_34(ip, op, parm) { \ + BITBLK64_34(ip, 0, op, parm); SRCI(ip); op += 34*4/sizeof(op[0]);\ +} + +#define BITBLK64_35(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*35+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 35;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*35+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 29;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*35+ 1) |= (uint64_t)SRC(ip, i*64+ 2) << 6 | (uint64_t)SRC1(ip, i*64+3) << 41;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*35+ 2) = (uint64_t)SRC(ip, i*64+ 3) >> 23;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*35+ 2) |= (uint64_t)SRC(ip, i*64+ 4) << 12 | (uint64_t)SRC1(ip, i*64+5) << 47;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*35+ 3) = (uint64_t)SRC(ip, i*64+ 5) >> 17;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*35+ 3) |= (uint64_t)SRC(ip, i*64+ 6) << 18 | (uint64_t)SRC1(ip, i*64+7) << 53;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*35+ 4) = (uint64_t)SRC(ip, i*64+ 7) >> 11;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*35+ 4) |= (uint64_t)SRC(ip, i*64+ 8) << 24 | (uint64_t)SRC1(ip, i*64+9) << 59;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*35+ 5) = (uint64_t)SRC(ip, i*64+ 9) >> 5 | (uint64_t)SRC1(ip, i*64+10) << 30;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*35+ 6) = (uint64_t)SRC(ip, i*64+10) >> 34;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*35+ 6) |= (uint64_t)SRC(ip, i*64+11) << 1 | (uint64_t)SRC1(ip, i*64+12) << 36;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*35+ 7) = (uint64_t)SRC(ip, i*64+12) >> 28;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*35+ 7) |= (uint64_t)SRC(ip, i*64+13) << 7 | (uint64_t)SRC1(ip, i*64+14) << 42;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*35+ 8) = (uint64_t)SRC(ip, i*64+14) >> 22;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*35+ 8) |= (uint64_t)SRC(ip, i*64+15) << 13 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*35+ 9) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*35+ 9) |= (uint64_t)SRC(ip, i*64+17) << 19 | (uint64_t)SRC1(ip, i*64+18) << 54;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*35+10) = (uint64_t)SRC(ip, i*64+18) >> 10;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*35+10) |= (uint64_t)SRC(ip, i*64+19) << 25 | (uint64_t)SRC1(ip, i*64+20) << 60;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*35+11) = (uint64_t)SRC(ip, i*64+20) >> 4 | (uint64_t)SRC1(ip, i*64+21) << 31;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*35+12) = (uint64_t)SRC(ip, i*64+21) >> 33;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*35+12) |= (uint64_t)SRC(ip, i*64+22) << 2 | (uint64_t)SRC1(ip, i*64+23) << 37;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*35+13) = (uint64_t)SRC(ip, i*64+23) >> 27;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*35+13) |= (uint64_t)SRC(ip, i*64+24) << 8 | (uint64_t)SRC1(ip, i*64+25) << 43;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*35+14) = (uint64_t)SRC(ip, i*64+25) >> 21;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*35+14) |= (uint64_t)SRC(ip, i*64+26) << 14 | (uint64_t)SRC1(ip, i*64+27) << 49;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*35+15) = (uint64_t)SRC(ip, i*64+27) >> 15;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*35+15) |= (uint64_t)SRC(ip, i*64+28) << 20 | (uint64_t)SRC1(ip, i*64+29) << 55;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*35+16) = (uint64_t)SRC(ip, i*64+29) >> 9;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*35+16) |= (uint64_t)SRC(ip, i*64+30) << 26 | (uint64_t)SRC1(ip, i*64+31) << 61;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*35+17) = (uint64_t)SRC(ip, i*64+31) >> 3;\ +} + +#define BITPACK64_35(ip, op, parm) { \ + BITBLK64_35(ip, 0, op, parm); SRCI(ip); op += 35*4/sizeof(op[0]);\ +} + +#define BITBLK64_36(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*9+ 0) = (uint64_t)SRC(ip, i*16+ 0) | (uint64_t)SRC1(ip, i*16+1) << 36;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*9+ 1) = (uint64_t)SRC(ip, i*16+ 1) >> 28;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*9+ 1) |= (uint64_t)SRC(ip, i*16+ 2) << 8 | (uint64_t)SRC1(ip, i*16+3) << 44;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*9+ 2) = (uint64_t)SRC(ip, i*16+ 3) >> 20;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*9+ 2) |= (uint64_t)SRC(ip, i*16+ 4) << 16 | (uint64_t)SRC1(ip, i*16+5) << 52;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*9+ 3) = (uint64_t)SRC(ip, i*16+ 5) >> 12;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*9+ 3) |= (uint64_t)SRC(ip, i*16+ 6) << 24 | (uint64_t)SRC1(ip, i*16+7) << 60;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*9+ 4) = (uint64_t)SRC(ip, i*16+ 7) >> 4 | (uint64_t)SRC1(ip, i*16+8) << 32;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*9+ 5) = (uint64_t)SRC(ip, i*16+ 8) >> 32;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*9+ 5) |= (uint64_t)SRC(ip, i*16+ 9) << 4 | (uint64_t)SRC1(ip, i*16+10) << 40;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*9+ 6) = (uint64_t)SRC(ip, i*16+10) >> 24;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*9+ 6) |= (uint64_t)SRC(ip, i*16+11) << 12 | (uint64_t)SRC1(ip, i*16+12) << 48;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*9+ 7) = (uint64_t)SRC(ip, i*16+12) >> 16;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*9+ 7) |= (uint64_t)SRC(ip, i*16+13) << 20 | (uint64_t)SRC1(ip, i*16+14) << 56;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*9+ 8) = (uint64_t)SRC(ip, i*16+14) >> 8;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*9+ 8) |= (uint64_t)SRC(ip, i*16+15) << 28;\ +} + +#define BITPACK64_36(ip, op, parm) { \ + BITBLK64_36(ip, 0, op, parm);\ + BITBLK64_36(ip, 1, op, parm); SRCI(ip); op += 36*4/sizeof(op[0]);\ +} + +#define BITBLK64_37(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*37+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 37;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*37+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 27;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*37+ 1) |= (uint64_t)SRC(ip, i*64+ 2) << 10 | (uint64_t)SRC1(ip, i*64+3) << 47;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*37+ 2) = (uint64_t)SRC(ip, i*64+ 3) >> 17;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*37+ 2) |= (uint64_t)SRC(ip, i*64+ 4) << 20 | (uint64_t)SRC1(ip, i*64+5) << 57;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*37+ 3) = (uint64_t)SRC(ip, i*64+ 5) >> 7 | (uint64_t)SRC1(ip, i*64+6) << 30;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*37+ 4) = (uint64_t)SRC(ip, i*64+ 6) >> 34;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*37+ 4) |= (uint64_t)SRC(ip, i*64+ 7) << 3 | (uint64_t)SRC1(ip, i*64+8) << 40;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*37+ 5) = (uint64_t)SRC(ip, i*64+ 8) >> 24;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*37+ 5) |= (uint64_t)SRC(ip, i*64+ 9) << 13 | (uint64_t)SRC1(ip, i*64+10) << 50;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*37+ 6) = (uint64_t)SRC(ip, i*64+10) >> 14;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*37+ 6) |= (uint64_t)SRC(ip, i*64+11) << 23 | (uint64_t)SRC1(ip, i*64+12) << 60;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*37+ 7) = (uint64_t)SRC(ip, i*64+12) >> 4 | (uint64_t)SRC1(ip, i*64+13) << 33;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*37+ 8) = (uint64_t)SRC(ip, i*64+13) >> 31;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*37+ 8) |= (uint64_t)SRC(ip, i*64+14) << 6 | (uint64_t)SRC1(ip, i*64+15) << 43;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*37+ 9) = (uint64_t)SRC(ip, i*64+15) >> 21;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*37+ 9) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 53;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*37+10) = (uint64_t)SRC(ip, i*64+17) >> 11;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*37+10) |= (uint64_t)SRC(ip, i*64+18) << 26 | (uint64_t)SRC1(ip, i*64+19) << 63;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*37+11) = (uint64_t)SRC(ip, i*64+19) >> 1 | (uint64_t)SRC1(ip, i*64+20) << 36;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*37+12) = (uint64_t)SRC(ip, i*64+20) >> 28;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*37+12) |= (uint64_t)SRC(ip, i*64+21) << 9 | (uint64_t)SRC1(ip, i*64+22) << 46;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*37+13) = (uint64_t)SRC(ip, i*64+22) >> 18;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*37+13) |= (uint64_t)SRC(ip, i*64+23) << 19 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*37+14) = (uint64_t)SRC(ip, i*64+24) >> 8 | (uint64_t)SRC1(ip, i*64+25) << 29;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*37+15) = (uint64_t)SRC(ip, i*64+25) >> 35;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*37+15) |= (uint64_t)SRC(ip, i*64+26) << 2 | (uint64_t)SRC1(ip, i*64+27) << 39;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*37+16) = (uint64_t)SRC(ip, i*64+27) >> 25;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*37+16) |= (uint64_t)SRC(ip, i*64+28) << 12 | (uint64_t)SRC1(ip, i*64+29) << 49;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*37+17) = (uint64_t)SRC(ip, i*64+29) >> 15;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*37+17) |= (uint64_t)SRC(ip, i*64+30) << 22 | (uint64_t)SRC1(ip, i*64+31) << 59;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*37+18) = (uint64_t)SRC(ip, i*64+31) >> 5;\ +} + +#define BITPACK64_37(ip, op, parm) { \ + BITBLK64_37(ip, 0, op, parm); SRCI(ip); op += 37*4/sizeof(op[0]);\ +} + +#define BITBLK64_38(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*19+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 38;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*19+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 26;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*19+ 1) |= (uint64_t)SRC(ip, i*32+ 2) << 12 | (uint64_t)SRC1(ip, i*32+3) << 50;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*19+ 2) = (uint64_t)SRC(ip, i*32+ 3) >> 14;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*19+ 2) |= (uint64_t)SRC(ip, i*32+ 4) << 24 | (uint64_t)SRC1(ip, i*32+5) << 62;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*19+ 3) = (uint64_t)SRC(ip, i*32+ 5) >> 2 | (uint64_t)SRC1(ip, i*32+6) << 36;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*19+ 4) = (uint64_t)SRC(ip, i*32+ 6) >> 28;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*19+ 4) |= (uint64_t)SRC(ip, i*32+ 7) << 10 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*19+ 5) = (uint64_t)SRC(ip, i*32+ 8) >> 16;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*19+ 5) |= (uint64_t)SRC(ip, i*32+ 9) << 22 | (uint64_t)SRC1(ip, i*32+10) << 60;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*19+ 6) = (uint64_t)SRC(ip, i*32+10) >> 4 | (uint64_t)SRC1(ip, i*32+11) << 34;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*19+ 7) = (uint64_t)SRC(ip, i*32+11) >> 30;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*19+ 7) |= (uint64_t)SRC(ip, i*32+12) << 8 | (uint64_t)SRC1(ip, i*32+13) << 46;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*19+ 8) = (uint64_t)SRC(ip, i*32+13) >> 18;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*19+ 8) |= (uint64_t)SRC(ip, i*32+14) << 20 | (uint64_t)SRC1(ip, i*32+15) << 58;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*19+ 9) = (uint64_t)SRC(ip, i*32+15) >> 6 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*19+10) = (uint64_t)SRC(ip, i*32+16) >> 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*19+10) |= (uint64_t)SRC(ip, i*32+17) << 6 | (uint64_t)SRC1(ip, i*32+18) << 44;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*19+11) = (uint64_t)SRC(ip, i*32+18) >> 20;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*19+11) |= (uint64_t)SRC(ip, i*32+19) << 18 | (uint64_t)SRC1(ip, i*32+20) << 56;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*19+12) = (uint64_t)SRC(ip, i*32+20) >> 8 | (uint64_t)SRC1(ip, i*32+21) << 30;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*19+13) = (uint64_t)SRC(ip, i*32+21) >> 34;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*19+13) |= (uint64_t)SRC(ip, i*32+22) << 4 | (uint64_t)SRC1(ip, i*32+23) << 42;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*19+14) = (uint64_t)SRC(ip, i*32+23) >> 22;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*19+14) |= (uint64_t)SRC(ip, i*32+24) << 16 | (uint64_t)SRC1(ip, i*32+25) << 54;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*19+15) = (uint64_t)SRC(ip, i*32+25) >> 10 | (uint64_t)SRC1(ip, i*32+26) << 28;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*19+16) = (uint64_t)SRC(ip, i*32+26) >> 36;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*19+16) |= (uint64_t)SRC(ip, i*32+27) << 2 | (uint64_t)SRC1(ip, i*32+28) << 40;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*19+17) = (uint64_t)SRC(ip, i*32+28) >> 24;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*19+17) |= (uint64_t)SRC(ip, i*32+29) << 14 | (uint64_t)SRC1(ip, i*32+30) << 52;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*19+18) = (uint64_t)SRC(ip, i*32+30) >> 12;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*19+18) |= (uint64_t)SRC(ip, i*32+31) << 26;\ +} + +#define BITPACK64_38(ip, op, parm) { \ + BITBLK64_38(ip, 0, op, parm); SRCI(ip); op += 38*4/sizeof(op[0]);\ +} + +#define BITBLK64_39(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*39+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 39;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*39+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 25;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*39+ 1) |= (uint64_t)SRC(ip, i*64+ 2) << 14 | (uint64_t)SRC1(ip, i*64+3) << 53;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*39+ 2) = (uint64_t)SRC(ip, i*64+ 3) >> 11 | (uint64_t)SRC1(ip, i*64+4) << 28;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*39+ 3) = (uint64_t)SRC(ip, i*64+ 4) >> 36;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*39+ 3) |= (uint64_t)SRC(ip, i*64+ 5) << 3 | (uint64_t)SRC1(ip, i*64+6) << 42;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*39+ 4) = (uint64_t)SRC(ip, i*64+ 6) >> 22;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*39+ 4) |= (uint64_t)SRC(ip, i*64+ 7) << 17 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*39+ 5) = (uint64_t)SRC(ip, i*64+ 8) >> 8 | (uint64_t)SRC1(ip, i*64+9) << 31;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*39+ 6) = (uint64_t)SRC(ip, i*64+ 9) >> 33;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*39+ 6) |= (uint64_t)SRC(ip, i*64+10) << 6 | (uint64_t)SRC1(ip, i*64+11) << 45;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*39+ 7) = (uint64_t)SRC(ip, i*64+11) >> 19;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*39+ 7) |= (uint64_t)SRC(ip, i*64+12) << 20 | (uint64_t)SRC1(ip, i*64+13) << 59;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*39+ 8) = (uint64_t)SRC(ip, i*64+13) >> 5 | (uint64_t)SRC1(ip, i*64+14) << 34;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*39+ 9) = (uint64_t)SRC(ip, i*64+14) >> 30;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*39+ 9) |= (uint64_t)SRC(ip, i*64+15) << 9 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*39+10) = (uint64_t)SRC(ip, i*64+16) >> 16;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*39+10) |= (uint64_t)SRC(ip, i*64+17) << 23 | (uint64_t)SRC1(ip, i*64+18) << 62;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*39+11) = (uint64_t)SRC(ip, i*64+18) >> 2 | (uint64_t)SRC1(ip, i*64+19) << 37;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*39+12) = (uint64_t)SRC(ip, i*64+19) >> 27;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*39+12) |= (uint64_t)SRC(ip, i*64+20) << 12 | (uint64_t)SRC1(ip, i*64+21) << 51;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*39+13) = (uint64_t)SRC(ip, i*64+21) >> 13 | (uint64_t)SRC1(ip, i*64+22) << 26;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*39+14) = (uint64_t)SRC(ip, i*64+22) >> 38;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*39+14) |= (uint64_t)SRC(ip, i*64+23) << 1 | (uint64_t)SRC1(ip, i*64+24) << 40;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*39+15) = (uint64_t)SRC(ip, i*64+24) >> 24;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*39+15) |= (uint64_t)SRC(ip, i*64+25) << 15 | (uint64_t)SRC1(ip, i*64+26) << 54;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*39+16) = (uint64_t)SRC(ip, i*64+26) >> 10 | (uint64_t)SRC1(ip, i*64+27) << 29;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*39+17) = (uint64_t)SRC(ip, i*64+27) >> 35;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*39+17) |= (uint64_t)SRC(ip, i*64+28) << 4 | (uint64_t)SRC1(ip, i*64+29) << 43;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*39+18) = (uint64_t)SRC(ip, i*64+29) >> 21;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*39+18) |= (uint64_t)SRC(ip, i*64+30) << 18 | (uint64_t)SRC1(ip, i*64+31) << 57;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*39+19) = (uint64_t)SRC(ip, i*64+31) >> 7;\ +} + +#define BITPACK64_39(ip, op, parm) { \ + BITBLK64_39(ip, 0, op, parm); SRCI(ip); op += 39*4/sizeof(op[0]);\ +} + +#define BITBLK64_40(ip, i, op, parm) { ;\ + IPPB(ip, i*8+ 0, parm); *((uint64_t *)op+i*5+ 0) = (uint64_t)SRC(ip, i*8+ 0) | (uint64_t)SRC1(ip, i*8+1) << 40;\ + IPPB(ip, i*8+ 1, parm); *((uint64_t *)op+i*5+ 1) = (uint64_t)SRC(ip, i*8+ 1) >> 24;\ + IPPB(ip, i*8+ 2, parm); *((uint64_t *)op+i*5+ 1) |= (uint64_t)SRC(ip, i*8+ 2) << 16 | (uint64_t)SRC1(ip, i*8+3) << 56;\ + IPPB(ip, i*8+ 3, parm); *((uint64_t *)op+i*5+ 2) = (uint64_t)SRC(ip, i*8+ 3) >> 8 | (uint64_t)SRC1(ip, i*8+4) << 32;\ + IPPB(ip, i*8+ 4, parm); *((uint64_t *)op+i*5+ 3) = (uint64_t)SRC(ip, i*8+ 4) >> 32;\ + IPPB(ip, i*8+ 5, parm); *((uint64_t *)op+i*5+ 3) |= (uint64_t)SRC(ip, i*8+ 5) << 8 | (uint64_t)SRC1(ip, i*8+6) << 48;\ + IPPB(ip, i*8+ 6, parm); *((uint64_t *)op+i*5+ 4) = (uint64_t)SRC(ip, i*8+ 6) >> 16;\ + IPPB(ip, i*8+ 7, parm); *((uint64_t *)op+i*5+ 4) |= (uint64_t)SRC(ip, i*8+ 7) << 24;\ +} + +#define BITPACK64_40(ip, op, parm) { \ + BITBLK64_40(ip, 0, op, parm);\ + BITBLK64_40(ip, 1, op, parm);\ + BITBLK64_40(ip, 2, op, parm);\ + BITBLK64_40(ip, 3, op, parm); SRCI(ip); op += 40*4/sizeof(op[0]);\ +} + +#define BITBLK64_41(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*41+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 41;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*41+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 23;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*41+ 1) |= (uint64_t)SRC(ip, i*64+ 2) << 18 | (uint64_t)SRC1(ip, i*64+3) << 59;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*41+ 2) = (uint64_t)SRC(ip, i*64+ 3) >> 5 | (uint64_t)SRC1(ip, i*64+4) << 36;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*41+ 3) = (uint64_t)SRC(ip, i*64+ 4) >> 28;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*41+ 3) |= (uint64_t)SRC(ip, i*64+ 5) << 13 | (uint64_t)SRC1(ip, i*64+6) << 54;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*41+ 4) = (uint64_t)SRC(ip, i*64+ 6) >> 10 | (uint64_t)SRC1(ip, i*64+7) << 31;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*41+ 5) = (uint64_t)SRC(ip, i*64+ 7) >> 33;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*41+ 5) |= (uint64_t)SRC(ip, i*64+ 8) << 8 | (uint64_t)SRC1(ip, i*64+9) << 49;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*41+ 6) = (uint64_t)SRC(ip, i*64+ 9) >> 15 | (uint64_t)SRC1(ip, i*64+10) << 26;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*41+ 7) = (uint64_t)SRC(ip, i*64+10) >> 38;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*41+ 7) |= (uint64_t)SRC(ip, i*64+11) << 3 | (uint64_t)SRC1(ip, i*64+12) << 44;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*41+ 8) = (uint64_t)SRC(ip, i*64+12) >> 20;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*41+ 8) |= (uint64_t)SRC(ip, i*64+13) << 21 | (uint64_t)SRC1(ip, i*64+14) << 62;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*41+ 9) = (uint64_t)SRC(ip, i*64+14) >> 2 | (uint64_t)SRC1(ip, i*64+15) << 39;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*41+10) = (uint64_t)SRC(ip, i*64+15) >> 25;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*41+10) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 57;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*41+11) = (uint64_t)SRC(ip, i*64+17) >> 7 | (uint64_t)SRC1(ip, i*64+18) << 34;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*41+12) = (uint64_t)SRC(ip, i*64+18) >> 30;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*41+12) |= (uint64_t)SRC(ip, i*64+19) << 11 | (uint64_t)SRC1(ip, i*64+20) << 52;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*41+13) = (uint64_t)SRC(ip, i*64+20) >> 12 | (uint64_t)SRC1(ip, i*64+21) << 29;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*41+14) = (uint64_t)SRC(ip, i*64+21) >> 35;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*41+14) |= (uint64_t)SRC(ip, i*64+22) << 6 | (uint64_t)SRC1(ip, i*64+23) << 47;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*41+15) = (uint64_t)SRC(ip, i*64+23) >> 17 | (uint64_t)SRC1(ip, i*64+24) << 24;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*41+16) = (uint64_t)SRC(ip, i*64+24) >> 40;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*41+16) |= (uint64_t)SRC(ip, i*64+25) << 1 | (uint64_t)SRC1(ip, i*64+26) << 42;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*41+17) = (uint64_t)SRC(ip, i*64+26) >> 22;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*41+17) |= (uint64_t)SRC(ip, i*64+27) << 19 | (uint64_t)SRC1(ip, i*64+28) << 60;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*41+18) = (uint64_t)SRC(ip, i*64+28) >> 4 | (uint64_t)SRC1(ip, i*64+29) << 37;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*41+19) = (uint64_t)SRC(ip, i*64+29) >> 27;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*41+19) |= (uint64_t)SRC(ip, i*64+30) << 14 | (uint64_t)SRC1(ip, i*64+31) << 55;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*41+20) = (uint64_t)SRC(ip, i*64+31) >> 9;\ +} + +#define BITPACK64_41(ip, op, parm) { \ + BITBLK64_41(ip, 0, op, parm); SRCI(ip); op += 41*4/sizeof(op[0]);\ +} + +#define BITBLK64_42(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*21+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 42;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*21+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 22;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*21+ 1) |= (uint64_t)SRC(ip, i*32+ 2) << 20 | (uint64_t)SRC1(ip, i*32+3) << 62;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*21+ 2) = (uint64_t)SRC(ip, i*32+ 3) >> 2 | (uint64_t)SRC1(ip, i*32+4) << 40;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*21+ 3) = (uint64_t)SRC(ip, i*32+ 4) >> 24;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*21+ 3) |= (uint64_t)SRC(ip, i*32+ 5) << 18 | (uint64_t)SRC1(ip, i*32+6) << 60;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*21+ 4) = (uint64_t)SRC(ip, i*32+ 6) >> 4 | (uint64_t)SRC1(ip, i*32+7) << 38;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*21+ 5) = (uint64_t)SRC(ip, i*32+ 7) >> 26;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*21+ 5) |= (uint64_t)SRC(ip, i*32+ 8) << 16 | (uint64_t)SRC1(ip, i*32+9) << 58;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*21+ 6) = (uint64_t)SRC(ip, i*32+ 9) >> 6 | (uint64_t)SRC1(ip, i*32+10) << 36;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*21+ 7) = (uint64_t)SRC(ip, i*32+10) >> 28;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*21+ 7) |= (uint64_t)SRC(ip, i*32+11) << 14 | (uint64_t)SRC1(ip, i*32+12) << 56;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*21+ 8) = (uint64_t)SRC(ip, i*32+12) >> 8 | (uint64_t)SRC1(ip, i*32+13) << 34;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*21+ 9) = (uint64_t)SRC(ip, i*32+13) >> 30;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*21+ 9) |= (uint64_t)SRC(ip, i*32+14) << 12 | (uint64_t)SRC1(ip, i*32+15) << 54;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*21+10) = (uint64_t)SRC(ip, i*32+15) >> 10 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*21+11) = (uint64_t)SRC(ip, i*32+16) >> 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*21+11) |= (uint64_t)SRC(ip, i*32+17) << 10 | (uint64_t)SRC1(ip, i*32+18) << 52;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*21+12) = (uint64_t)SRC(ip, i*32+18) >> 12 | (uint64_t)SRC1(ip, i*32+19) << 30;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*21+13) = (uint64_t)SRC(ip, i*32+19) >> 34;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*21+13) |= (uint64_t)SRC(ip, i*32+20) << 8 | (uint64_t)SRC1(ip, i*32+21) << 50;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*21+14) = (uint64_t)SRC(ip, i*32+21) >> 14 | (uint64_t)SRC1(ip, i*32+22) << 28;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*21+15) = (uint64_t)SRC(ip, i*32+22) >> 36;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*21+15) |= (uint64_t)SRC(ip, i*32+23) << 6 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*21+16) = (uint64_t)SRC(ip, i*32+24) >> 16 | (uint64_t)SRC1(ip, i*32+25) << 26;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*21+17) = (uint64_t)SRC(ip, i*32+25) >> 38;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*21+17) |= (uint64_t)SRC(ip, i*32+26) << 4 | (uint64_t)SRC1(ip, i*32+27) << 46;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*21+18) = (uint64_t)SRC(ip, i*32+27) >> 18 | (uint64_t)SRC1(ip, i*32+28) << 24;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*21+19) = (uint64_t)SRC(ip, i*32+28) >> 40;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*21+19) |= (uint64_t)SRC(ip, i*32+29) << 2 | (uint64_t)SRC1(ip, i*32+30) << 44;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*21+20) = (uint64_t)SRC(ip, i*32+30) >> 20;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*21+20) |= (uint64_t)SRC(ip, i*32+31) << 22;\ +} + +#define BITPACK64_42(ip, op, parm) { \ + BITBLK64_42(ip, 0, op, parm); SRCI(ip); op += 42*4/sizeof(op[0]);\ +} + +#define BITBLK64_43(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*43+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 43;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*43+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 21 | (uint64_t)SRC1(ip, i*64+2) << 22;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*43+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 42;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*43+ 2) |= (uint64_t)SRC(ip, i*64+ 3) << 1 | (uint64_t)SRC1(ip, i*64+4) << 44;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*43+ 3) = (uint64_t)SRC(ip, i*64+ 4) >> 20 | (uint64_t)SRC1(ip, i*64+5) << 23;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*43+ 4) = (uint64_t)SRC(ip, i*64+ 5) >> 41;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*43+ 4) |= (uint64_t)SRC(ip, i*64+ 6) << 2 | (uint64_t)SRC1(ip, i*64+7) << 45;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*43+ 5) = (uint64_t)SRC(ip, i*64+ 7) >> 19 | (uint64_t)SRC1(ip, i*64+8) << 24;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*43+ 6) = (uint64_t)SRC(ip, i*64+ 8) >> 40;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*43+ 6) |= (uint64_t)SRC(ip, i*64+ 9) << 3 | (uint64_t)SRC1(ip, i*64+10) << 46;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*43+ 7) = (uint64_t)SRC(ip, i*64+10) >> 18 | (uint64_t)SRC1(ip, i*64+11) << 25;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*43+ 8) = (uint64_t)SRC(ip, i*64+11) >> 39;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*43+ 8) |= (uint64_t)SRC(ip, i*64+12) << 4 | (uint64_t)SRC1(ip, i*64+13) << 47;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*43+ 9) = (uint64_t)SRC(ip, i*64+13) >> 17 | (uint64_t)SRC1(ip, i*64+14) << 26;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*43+10) = (uint64_t)SRC(ip, i*64+14) >> 38;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*43+10) |= (uint64_t)SRC(ip, i*64+15) << 5 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*43+11) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 27;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*43+12) = (uint64_t)SRC(ip, i*64+17) >> 37;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*43+12) |= (uint64_t)SRC(ip, i*64+18) << 6 | (uint64_t)SRC1(ip, i*64+19) << 49;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*43+13) = (uint64_t)SRC(ip, i*64+19) >> 15 | (uint64_t)SRC1(ip, i*64+20) << 28;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*43+14) = (uint64_t)SRC(ip, i*64+20) >> 36;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*43+14) |= (uint64_t)SRC(ip, i*64+21) << 7 | (uint64_t)SRC1(ip, i*64+22) << 50;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*43+15) = (uint64_t)SRC(ip, i*64+22) >> 14 | (uint64_t)SRC1(ip, i*64+23) << 29;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*43+16) = (uint64_t)SRC(ip, i*64+23) >> 35;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*43+16) |= (uint64_t)SRC(ip, i*64+24) << 8 | (uint64_t)SRC1(ip, i*64+25) << 51;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*43+17) = (uint64_t)SRC(ip, i*64+25) >> 13 | (uint64_t)SRC1(ip, i*64+26) << 30;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*43+18) = (uint64_t)SRC(ip, i*64+26) >> 34;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*43+18) |= (uint64_t)SRC(ip, i*64+27) << 9 | (uint64_t)SRC1(ip, i*64+28) << 52;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*43+19) = (uint64_t)SRC(ip, i*64+28) >> 12 | (uint64_t)SRC1(ip, i*64+29) << 31;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*43+20) = (uint64_t)SRC(ip, i*64+29) >> 33;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*43+20) |= (uint64_t)SRC(ip, i*64+30) << 10 | (uint64_t)SRC1(ip, i*64+31) << 53;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*43+21) = (uint64_t)SRC(ip, i*64+31) >> 11;\ +} + +#define BITPACK64_43(ip, op, parm) { \ + BITBLK64_43(ip, 0, op, parm); SRCI(ip); op += 43*4/sizeof(op[0]);\ +} + +#define BITBLK64_44(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*11+ 0) = (uint64_t)SRC(ip, i*16+ 0) | (uint64_t)SRC1(ip, i*16+1) << 44;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*11+ 1) = (uint64_t)SRC(ip, i*16+ 1) >> 20 | (uint64_t)SRC1(ip, i*16+2) << 24;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*11+ 2) = (uint64_t)SRC(ip, i*16+ 2) >> 40;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*11+ 2) |= (uint64_t)SRC(ip, i*16+ 3) << 4 | (uint64_t)SRC1(ip, i*16+4) << 48;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*11+ 3) = (uint64_t)SRC(ip, i*16+ 4) >> 16 | (uint64_t)SRC1(ip, i*16+5) << 28;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*11+ 4) = (uint64_t)SRC(ip, i*16+ 5) >> 36;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*11+ 4) |= (uint64_t)SRC(ip, i*16+ 6) << 8 | (uint64_t)SRC1(ip, i*16+7) << 52;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*11+ 5) = (uint64_t)SRC(ip, i*16+ 7) >> 12 | (uint64_t)SRC1(ip, i*16+8) << 32;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*11+ 6) = (uint64_t)SRC(ip, i*16+ 8) >> 32;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*11+ 6) |= (uint64_t)SRC(ip, i*16+ 9) << 12 | (uint64_t)SRC1(ip, i*16+10) << 56;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*11+ 7) = (uint64_t)SRC(ip, i*16+10) >> 8 | (uint64_t)SRC1(ip, i*16+11) << 36;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*11+ 8) = (uint64_t)SRC(ip, i*16+11) >> 28;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*11+ 8) |= (uint64_t)SRC(ip, i*16+12) << 16 | (uint64_t)SRC1(ip, i*16+13) << 60;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*11+ 9) = (uint64_t)SRC(ip, i*16+13) >> 4 | (uint64_t)SRC1(ip, i*16+14) << 40;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*11+10) = (uint64_t)SRC(ip, i*16+14) >> 24;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*11+10) |= (uint64_t)SRC(ip, i*16+15) << 20;\ +} + +#define BITPACK64_44(ip, op, parm) { \ + BITBLK64_44(ip, 0, op, parm);\ + BITBLK64_44(ip, 1, op, parm); SRCI(ip); op += 44*4/sizeof(op[0]);\ +} + +#define BITBLK64_45(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*45+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 45;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*45+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 19 | (uint64_t)SRC1(ip, i*64+2) << 26;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*45+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 38;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*45+ 2) |= (uint64_t)SRC(ip, i*64+ 3) << 7 | (uint64_t)SRC1(ip, i*64+4) << 52;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*45+ 3) = (uint64_t)SRC(ip, i*64+ 4) >> 12 | (uint64_t)SRC1(ip, i*64+5) << 33;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*45+ 4) = (uint64_t)SRC(ip, i*64+ 5) >> 31;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*45+ 4) |= (uint64_t)SRC(ip, i*64+ 6) << 14 | (uint64_t)SRC1(ip, i*64+7) << 59;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*45+ 5) = (uint64_t)SRC(ip, i*64+ 7) >> 5 | (uint64_t)SRC1(ip, i*64+8) << 40;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*45+ 6) = (uint64_t)SRC(ip, i*64+ 8) >> 24 | (uint64_t)SRC1(ip, i*64+9) << 21;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*45+ 7) = (uint64_t)SRC(ip, i*64+ 9) >> 43;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*45+ 7) |= (uint64_t)SRC(ip, i*64+10) << 2 | (uint64_t)SRC1(ip, i*64+11) << 47;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*45+ 8) = (uint64_t)SRC(ip, i*64+11) >> 17 | (uint64_t)SRC1(ip, i*64+12) << 28;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*45+ 9) = (uint64_t)SRC(ip, i*64+12) >> 36;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*45+ 9) |= (uint64_t)SRC(ip, i*64+13) << 9 | (uint64_t)SRC1(ip, i*64+14) << 54;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*45+10) = (uint64_t)SRC(ip, i*64+14) >> 10 | (uint64_t)SRC1(ip, i*64+15) << 35;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*45+11) = (uint64_t)SRC(ip, i*64+15) >> 29;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*45+11) |= (uint64_t)SRC(ip, i*64+16) << 16 | (uint64_t)SRC1(ip, i*64+17) << 61;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*45+12) = (uint64_t)SRC(ip, i*64+17) >> 3 | (uint64_t)SRC1(ip, i*64+18) << 42;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*45+13) = (uint64_t)SRC(ip, i*64+18) >> 22 | (uint64_t)SRC1(ip, i*64+19) << 23;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*45+14) = (uint64_t)SRC(ip, i*64+19) >> 41;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*45+14) |= (uint64_t)SRC(ip, i*64+20) << 4 | (uint64_t)SRC1(ip, i*64+21) << 49;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*45+15) = (uint64_t)SRC(ip, i*64+21) >> 15 | (uint64_t)SRC1(ip, i*64+22) << 30;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*45+16) = (uint64_t)SRC(ip, i*64+22) >> 34;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*45+16) |= (uint64_t)SRC(ip, i*64+23) << 11 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*45+17) = (uint64_t)SRC(ip, i*64+24) >> 8 | (uint64_t)SRC1(ip, i*64+25) << 37;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*45+18) = (uint64_t)SRC(ip, i*64+25) >> 27;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*45+18) |= (uint64_t)SRC(ip, i*64+26) << 18 | (uint64_t)SRC1(ip, i*64+27) << 63;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*45+19) = (uint64_t)SRC(ip, i*64+27) >> 1 | (uint64_t)SRC1(ip, i*64+28) << 44;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*45+20) = (uint64_t)SRC(ip, i*64+28) >> 20 | (uint64_t)SRC1(ip, i*64+29) << 25;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*45+21) = (uint64_t)SRC(ip, i*64+29) >> 39;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*45+21) |= (uint64_t)SRC(ip, i*64+30) << 6 | (uint64_t)SRC1(ip, i*64+31) << 51;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*45+22) = (uint64_t)SRC(ip, i*64+31) >> 13;\ +} + +#define BITPACK64_45(ip, op, parm) { \ + BITBLK64_45(ip, 0, op, parm); SRCI(ip); op += 45*4/sizeof(op[0]);\ +} + +#define BITBLK64_46(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*23+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 46;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*23+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 18 | (uint64_t)SRC1(ip, i*32+2) << 28;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*23+ 2) = (uint64_t)SRC(ip, i*32+ 2) >> 36;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*23+ 2) |= (uint64_t)SRC(ip, i*32+ 3) << 10 | (uint64_t)SRC1(ip, i*32+4) << 56;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*23+ 3) = (uint64_t)SRC(ip, i*32+ 4) >> 8 | (uint64_t)SRC1(ip, i*32+5) << 38;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*23+ 4) = (uint64_t)SRC(ip, i*32+ 5) >> 26 | (uint64_t)SRC1(ip, i*32+6) << 20;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*23+ 5) = (uint64_t)SRC(ip, i*32+ 6) >> 44;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*23+ 5) |= (uint64_t)SRC(ip, i*32+ 7) << 2 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*23+ 6) = (uint64_t)SRC(ip, i*32+ 8) >> 16 | (uint64_t)SRC1(ip, i*32+9) << 30;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*23+ 7) = (uint64_t)SRC(ip, i*32+ 9) >> 34;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*23+ 7) |= (uint64_t)SRC(ip, i*32+10) << 12 | (uint64_t)SRC1(ip, i*32+11) << 58;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*23+ 8) = (uint64_t)SRC(ip, i*32+11) >> 6 | (uint64_t)SRC1(ip, i*32+12) << 40;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*23+ 9) = (uint64_t)SRC(ip, i*32+12) >> 24 | (uint64_t)SRC1(ip, i*32+13) << 22;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*23+10) = (uint64_t)SRC(ip, i*32+13) >> 42;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*23+10) |= (uint64_t)SRC(ip, i*32+14) << 4 | (uint64_t)SRC1(ip, i*32+15) << 50;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*23+11) = (uint64_t)SRC(ip, i*32+15) >> 14 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*23+12) = (uint64_t)SRC(ip, i*32+16) >> 32;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*23+12) |= (uint64_t)SRC(ip, i*32+17) << 14 | (uint64_t)SRC1(ip, i*32+18) << 60;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*23+13) = (uint64_t)SRC(ip, i*32+18) >> 4 | (uint64_t)SRC1(ip, i*32+19) << 42;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*23+14) = (uint64_t)SRC(ip, i*32+19) >> 22 | (uint64_t)SRC1(ip, i*32+20) << 24;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*23+15) = (uint64_t)SRC(ip, i*32+20) >> 40;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*23+15) |= (uint64_t)SRC(ip, i*32+21) << 6 | (uint64_t)SRC1(ip, i*32+22) << 52;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*23+16) = (uint64_t)SRC(ip, i*32+22) >> 12 | (uint64_t)SRC1(ip, i*32+23) << 34;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*23+17) = (uint64_t)SRC(ip, i*32+23) >> 30;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*23+17) |= (uint64_t)SRC(ip, i*32+24) << 16 | (uint64_t)SRC1(ip, i*32+25) << 62;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*23+18) = (uint64_t)SRC(ip, i*32+25) >> 2 | (uint64_t)SRC1(ip, i*32+26) << 44;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*23+19) = (uint64_t)SRC(ip, i*32+26) >> 20 | (uint64_t)SRC1(ip, i*32+27) << 26;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*23+20) = (uint64_t)SRC(ip, i*32+27) >> 38;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*23+20) |= (uint64_t)SRC(ip, i*32+28) << 8 | (uint64_t)SRC1(ip, i*32+29) << 54;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*23+21) = (uint64_t)SRC(ip, i*32+29) >> 10 | (uint64_t)SRC1(ip, i*32+30) << 36;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*23+22) = (uint64_t)SRC(ip, i*32+30) >> 28;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*23+22) |= (uint64_t)SRC(ip, i*32+31) << 18;\ +} + +#define BITPACK64_46(ip, op, parm) { \ + BITBLK64_46(ip, 0, op, parm); SRCI(ip); op += 46*4/sizeof(op[0]);\ +} + +#define BITBLK64_47(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*47+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 47;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*47+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 17 | (uint64_t)SRC1(ip, i*64+2) << 30;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*47+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 34;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*47+ 2) |= (uint64_t)SRC(ip, i*64+ 3) << 13 | (uint64_t)SRC1(ip, i*64+4) << 60;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*47+ 3) = (uint64_t)SRC(ip, i*64+ 4) >> 4 | (uint64_t)SRC1(ip, i*64+5) << 43;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*47+ 4) = (uint64_t)SRC(ip, i*64+ 5) >> 21 | (uint64_t)SRC1(ip, i*64+6) << 26;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*47+ 5) = (uint64_t)SRC(ip, i*64+ 6) >> 38;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*47+ 5) |= (uint64_t)SRC(ip, i*64+ 7) << 9 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*47+ 6) = (uint64_t)SRC(ip, i*64+ 8) >> 8 | (uint64_t)SRC1(ip, i*64+9) << 39;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*47+ 7) = (uint64_t)SRC(ip, i*64+ 9) >> 25 | (uint64_t)SRC1(ip, i*64+10) << 22;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*47+ 8) = (uint64_t)SRC(ip, i*64+10) >> 42;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*47+ 8) |= (uint64_t)SRC(ip, i*64+11) << 5 | (uint64_t)SRC1(ip, i*64+12) << 52;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*47+ 9) = (uint64_t)SRC(ip, i*64+12) >> 12 | (uint64_t)SRC1(ip, i*64+13) << 35;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*47+10) = (uint64_t)SRC(ip, i*64+13) >> 29 | (uint64_t)SRC1(ip, i*64+14) << 18;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*47+11) = (uint64_t)SRC(ip, i*64+14) >> 46;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*47+11) |= (uint64_t)SRC(ip, i*64+15) << 1 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*47+12) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 31;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*47+13) = (uint64_t)SRC(ip, i*64+17) >> 33;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*47+13) |= (uint64_t)SRC(ip, i*64+18) << 14 | (uint64_t)SRC1(ip, i*64+19) << 61;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*47+14) = (uint64_t)SRC(ip, i*64+19) >> 3 | (uint64_t)SRC1(ip, i*64+20) << 44;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*47+15) = (uint64_t)SRC(ip, i*64+20) >> 20 | (uint64_t)SRC1(ip, i*64+21) << 27;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*47+16) = (uint64_t)SRC(ip, i*64+21) >> 37;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*47+16) |= (uint64_t)SRC(ip, i*64+22) << 10 | (uint64_t)SRC1(ip, i*64+23) << 57;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*47+17) = (uint64_t)SRC(ip, i*64+23) >> 7 | (uint64_t)SRC1(ip, i*64+24) << 40;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*47+18) = (uint64_t)SRC(ip, i*64+24) >> 24 | (uint64_t)SRC1(ip, i*64+25) << 23;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*47+19) = (uint64_t)SRC(ip, i*64+25) >> 41;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*47+19) |= (uint64_t)SRC(ip, i*64+26) << 6 | (uint64_t)SRC1(ip, i*64+27) << 53;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*47+20) = (uint64_t)SRC(ip, i*64+27) >> 11 | (uint64_t)SRC1(ip, i*64+28) << 36;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*47+21) = (uint64_t)SRC(ip, i*64+28) >> 28 | (uint64_t)SRC1(ip, i*64+29) << 19;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*47+22) = (uint64_t)SRC(ip, i*64+29) >> 45;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*47+22) |= (uint64_t)SRC(ip, i*64+30) << 2 | (uint64_t)SRC1(ip, i*64+31) << 49;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*47+23) = (uint64_t)SRC(ip, i*64+31) >> 15;\ +} + +#define BITPACK64_47(ip, op, parm) { \ + BITBLK64_47(ip, 0, op, parm); SRCI(ip); op += 47*4/sizeof(op[0]);\ +} + +#define BITBLK64_48(ip, i, op, parm) { ;\ + IPPB(ip, i*4+ 0, parm); *((uint64_t *)op+i*3+ 0) = (uint64_t)SRC(ip, i*4+ 0) | (uint64_t)SRC1(ip, i*4+1) << 48;\ + IPPB(ip, i*4+ 1, parm); *((uint64_t *)op+i*3+ 1) = (uint64_t)SRC(ip, i*4+ 1) >> 16 | (uint64_t)SRC1(ip, i*4+2) << 32;\ + IPPB(ip, i*4+ 2, parm); *((uint64_t *)op+i*3+ 2) = (uint64_t)SRC(ip, i*4+ 2) >> 32;\ + IPPB(ip, i*4+ 3, parm); *((uint64_t *)op+i*3+ 2) |= (uint64_t)SRC(ip, i*4+ 3) << 16;\ +} + +#define BITPACK64_48(ip, op, parm) { \ + BITBLK64_48(ip, 0, op, parm);\ + BITBLK64_48(ip, 1, op, parm);\ + BITBLK64_48(ip, 2, op, parm);\ + BITBLK64_48(ip, 3, op, parm);\ + BITBLK64_48(ip, 4, op, parm);\ + BITBLK64_48(ip, 5, op, parm);\ + BITBLK64_48(ip, 6, op, parm);\ + BITBLK64_48(ip, 7, op, parm); SRCI(ip); op += 48*4/sizeof(op[0]);\ +} + +#define BITBLK64_49(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*49+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 49;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*49+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 15 | (uint64_t)SRC1(ip, i*64+2) << 34;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*49+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 30 | (uint64_t)SRC1(ip, i*64+3) << 19;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*49+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 45;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*49+ 3) |= (uint64_t)SRC(ip, i*64+ 4) << 4 | (uint64_t)SRC1(ip, i*64+5) << 53;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*49+ 4) = (uint64_t)SRC(ip, i*64+ 5) >> 11 | (uint64_t)SRC1(ip, i*64+6) << 38;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*49+ 5) = (uint64_t)SRC(ip, i*64+ 6) >> 26 | (uint64_t)SRC1(ip, i*64+7) << 23;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*49+ 6) = (uint64_t)SRC(ip, i*64+ 7) >> 41;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*49+ 6) |= (uint64_t)SRC(ip, i*64+ 8) << 8 | (uint64_t)SRC1(ip, i*64+9) << 57;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*49+ 7) = (uint64_t)SRC(ip, i*64+ 9) >> 7 | (uint64_t)SRC1(ip, i*64+10) << 42;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*49+ 8) = (uint64_t)SRC(ip, i*64+10) >> 22 | (uint64_t)SRC1(ip, i*64+11) << 27;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*49+ 9) = (uint64_t)SRC(ip, i*64+11) >> 37;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*49+ 9) |= (uint64_t)SRC(ip, i*64+12) << 12 | (uint64_t)SRC1(ip, i*64+13) << 61;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*49+10) = (uint64_t)SRC(ip, i*64+13) >> 3 | (uint64_t)SRC1(ip, i*64+14) << 46;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*49+11) = (uint64_t)SRC(ip, i*64+14) >> 18 | (uint64_t)SRC1(ip, i*64+15) << 31;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*49+12) = (uint64_t)SRC(ip, i*64+15) >> 33 | (uint64_t)SRC1(ip, i*64+16) << 16;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*49+13) = (uint64_t)SRC(ip, i*64+16) >> 48;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*49+13) |= (uint64_t)SRC(ip, i*64+17) << 1 | (uint64_t)SRC1(ip, i*64+18) << 50;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*49+14) = (uint64_t)SRC(ip, i*64+18) >> 14 | (uint64_t)SRC1(ip, i*64+19) << 35;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*49+15) = (uint64_t)SRC(ip, i*64+19) >> 29 | (uint64_t)SRC1(ip, i*64+20) << 20;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*49+16) = (uint64_t)SRC(ip, i*64+20) >> 44;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*49+16) |= (uint64_t)SRC(ip, i*64+21) << 5 | (uint64_t)SRC1(ip, i*64+22) << 54;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*49+17) = (uint64_t)SRC(ip, i*64+22) >> 10 | (uint64_t)SRC1(ip, i*64+23) << 39;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*49+18) = (uint64_t)SRC(ip, i*64+23) >> 25 | (uint64_t)SRC1(ip, i*64+24) << 24;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*49+19) = (uint64_t)SRC(ip, i*64+24) >> 40;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*49+19) |= (uint64_t)SRC(ip, i*64+25) << 9 | (uint64_t)SRC1(ip, i*64+26) << 58;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*49+20) = (uint64_t)SRC(ip, i*64+26) >> 6 | (uint64_t)SRC1(ip, i*64+27) << 43;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*49+21) = (uint64_t)SRC(ip, i*64+27) >> 21 | (uint64_t)SRC1(ip, i*64+28) << 28;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*49+22) = (uint64_t)SRC(ip, i*64+28) >> 36;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*49+22) |= (uint64_t)SRC(ip, i*64+29) << 13 | (uint64_t)SRC1(ip, i*64+30) << 62;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*49+23) = (uint64_t)SRC(ip, i*64+30) >> 2 | (uint64_t)SRC1(ip, i*64+31) << 47;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*49+24) = (uint64_t)SRC(ip, i*64+31) >> 17;\ +} + +#define BITPACK64_49(ip, op, parm) { \ + BITBLK64_49(ip, 0, op, parm); SRCI(ip); op += 49*4/sizeof(op[0]);\ +} + +#define BITBLK64_50(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*25+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 50;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*25+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 14 | (uint64_t)SRC1(ip, i*32+2) << 36;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*25+ 2) = (uint64_t)SRC(ip, i*32+ 2) >> 28 | (uint64_t)SRC1(ip, i*32+3) << 22;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*25+ 3) = (uint64_t)SRC(ip, i*32+ 3) >> 42;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*25+ 3) |= (uint64_t)SRC(ip, i*32+ 4) << 8 | (uint64_t)SRC1(ip, i*32+5) << 58;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*25+ 4) = (uint64_t)SRC(ip, i*32+ 5) >> 6 | (uint64_t)SRC1(ip, i*32+6) << 44;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*25+ 5) = (uint64_t)SRC(ip, i*32+ 6) >> 20 | (uint64_t)SRC1(ip, i*32+7) << 30;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*25+ 6) = (uint64_t)SRC(ip, i*32+ 7) >> 34 | (uint64_t)SRC1(ip, i*32+8) << 16;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*25+ 7) = (uint64_t)SRC(ip, i*32+ 8) >> 48;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*25+ 7) |= (uint64_t)SRC(ip, i*32+ 9) << 2 | (uint64_t)SRC1(ip, i*32+10) << 52;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*25+ 8) = (uint64_t)SRC(ip, i*32+10) >> 12 | (uint64_t)SRC1(ip, i*32+11) << 38;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*25+ 9) = (uint64_t)SRC(ip, i*32+11) >> 26 | (uint64_t)SRC1(ip, i*32+12) << 24;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*25+10) = (uint64_t)SRC(ip, i*32+12) >> 40;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*25+10) |= (uint64_t)SRC(ip, i*32+13) << 10 | (uint64_t)SRC1(ip, i*32+14) << 60;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*25+11) = (uint64_t)SRC(ip, i*32+14) >> 4 | (uint64_t)SRC1(ip, i*32+15) << 46;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*25+12) = (uint64_t)SRC(ip, i*32+15) >> 18 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*25+13) = (uint64_t)SRC(ip, i*32+16) >> 32 | (uint64_t)SRC1(ip, i*32+17) << 18;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*25+14) = (uint64_t)SRC(ip, i*32+17) >> 46;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*25+14) |= (uint64_t)SRC(ip, i*32+18) << 4 | (uint64_t)SRC1(ip, i*32+19) << 54;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*25+15) = (uint64_t)SRC(ip, i*32+19) >> 10 | (uint64_t)SRC1(ip, i*32+20) << 40;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*25+16) = (uint64_t)SRC(ip, i*32+20) >> 24 | (uint64_t)SRC1(ip, i*32+21) << 26;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*25+17) = (uint64_t)SRC(ip, i*32+21) >> 38;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*25+17) |= (uint64_t)SRC(ip, i*32+22) << 12 | (uint64_t)SRC1(ip, i*32+23) << 62;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*25+18) = (uint64_t)SRC(ip, i*32+23) >> 2 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*25+19) = (uint64_t)SRC(ip, i*32+24) >> 16 | (uint64_t)SRC1(ip, i*32+25) << 34;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*25+20) = (uint64_t)SRC(ip, i*32+25) >> 30 | (uint64_t)SRC1(ip, i*32+26) << 20;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*25+21) = (uint64_t)SRC(ip, i*32+26) >> 44;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*25+21) |= (uint64_t)SRC(ip, i*32+27) << 6 | (uint64_t)SRC1(ip, i*32+28) << 56;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*25+22) = (uint64_t)SRC(ip, i*32+28) >> 8 | (uint64_t)SRC1(ip, i*32+29) << 42;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*25+23) = (uint64_t)SRC(ip, i*32+29) >> 22 | (uint64_t)SRC1(ip, i*32+30) << 28;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*25+24) = (uint64_t)SRC(ip, i*32+30) >> 36;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*25+24) |= (uint64_t)SRC(ip, i*32+31) << 14;\ +} + +#define BITPACK64_50(ip, op, parm) { \ + BITBLK64_50(ip, 0, op, parm); SRCI(ip); op += 50*4/sizeof(op[0]);\ +} + +#define BITBLK64_51(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*51+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 51;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*51+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 13 | (uint64_t)SRC1(ip, i*64+2) << 38;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*51+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 26 | (uint64_t)SRC1(ip, i*64+3) << 25;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*51+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 39;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*51+ 3) |= (uint64_t)SRC(ip, i*64+ 4) << 12 | (uint64_t)SRC1(ip, i*64+5) << 63;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*51+ 4) = (uint64_t)SRC(ip, i*64+ 5) >> 1 | (uint64_t)SRC1(ip, i*64+6) << 50;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*51+ 5) = (uint64_t)SRC(ip, i*64+ 6) >> 14 | (uint64_t)SRC1(ip, i*64+7) << 37;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*51+ 6) = (uint64_t)SRC(ip, i*64+ 7) >> 27 | (uint64_t)SRC1(ip, i*64+8) << 24;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*51+ 7) = (uint64_t)SRC(ip, i*64+ 8) >> 40;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*51+ 7) |= (uint64_t)SRC(ip, i*64+ 9) << 11 | (uint64_t)SRC1(ip, i*64+10) << 62;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*51+ 8) = (uint64_t)SRC(ip, i*64+10) >> 2 | (uint64_t)SRC1(ip, i*64+11) << 49;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*51+ 9) = (uint64_t)SRC(ip, i*64+11) >> 15 | (uint64_t)SRC1(ip, i*64+12) << 36;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*51+10) = (uint64_t)SRC(ip, i*64+12) >> 28 | (uint64_t)SRC1(ip, i*64+13) << 23;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*51+11) = (uint64_t)SRC(ip, i*64+13) >> 41;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*51+11) |= (uint64_t)SRC(ip, i*64+14) << 10 | (uint64_t)SRC1(ip, i*64+15) << 61;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*51+12) = (uint64_t)SRC(ip, i*64+15) >> 3 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*51+13) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 35;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*51+14) = (uint64_t)SRC(ip, i*64+17) >> 29 | (uint64_t)SRC1(ip, i*64+18) << 22;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*51+15) = (uint64_t)SRC(ip, i*64+18) >> 42;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*51+15) |= (uint64_t)SRC(ip, i*64+19) << 9 | (uint64_t)SRC1(ip, i*64+20) << 60;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*51+16) = (uint64_t)SRC(ip, i*64+20) >> 4 | (uint64_t)SRC1(ip, i*64+21) << 47;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*51+17) = (uint64_t)SRC(ip, i*64+21) >> 17 | (uint64_t)SRC1(ip, i*64+22) << 34;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*51+18) = (uint64_t)SRC(ip, i*64+22) >> 30 | (uint64_t)SRC1(ip, i*64+23) << 21;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*51+19) = (uint64_t)SRC(ip, i*64+23) >> 43;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*51+19) |= (uint64_t)SRC(ip, i*64+24) << 8 | (uint64_t)SRC1(ip, i*64+25) << 59;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*51+20) = (uint64_t)SRC(ip, i*64+25) >> 5 | (uint64_t)SRC1(ip, i*64+26) << 46;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*51+21) = (uint64_t)SRC(ip, i*64+26) >> 18 | (uint64_t)SRC1(ip, i*64+27) << 33;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*51+22) = (uint64_t)SRC(ip, i*64+27) >> 31 | (uint64_t)SRC1(ip, i*64+28) << 20;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*51+23) = (uint64_t)SRC(ip, i*64+28) >> 44;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*51+23) |= (uint64_t)SRC(ip, i*64+29) << 7 | (uint64_t)SRC1(ip, i*64+30) << 58;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*51+24) = (uint64_t)SRC(ip, i*64+30) >> 6 | (uint64_t)SRC1(ip, i*64+31) << 45;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*51+25) = (uint64_t)SRC(ip, i*64+31) >> 19;\ +} + +#define BITPACK64_51(ip, op, parm) { \ + BITBLK64_51(ip, 0, op, parm); SRCI(ip); op += 51*4/sizeof(op[0]);\ +} + +#define BITBLK64_52(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*13+ 0) = (uint64_t)SRC(ip, i*16+ 0) | (uint64_t)SRC1(ip, i*16+1) << 52;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*13+ 1) = (uint64_t)SRC(ip, i*16+ 1) >> 12 | (uint64_t)SRC1(ip, i*16+2) << 40;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*13+ 2) = (uint64_t)SRC(ip, i*16+ 2) >> 24 | (uint64_t)SRC1(ip, i*16+3) << 28;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*13+ 3) = (uint64_t)SRC(ip, i*16+ 3) >> 36 | (uint64_t)SRC1(ip, i*16+4) << 16;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*13+ 4) = (uint64_t)SRC(ip, i*16+ 4) >> 48;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*13+ 4) |= (uint64_t)SRC(ip, i*16+ 5) << 4 | (uint64_t)SRC1(ip, i*16+6) << 56;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*13+ 5) = (uint64_t)SRC(ip, i*16+ 6) >> 8 | (uint64_t)SRC1(ip, i*16+7) << 44;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*13+ 6) = (uint64_t)SRC(ip, i*16+ 7) >> 20 | (uint64_t)SRC1(ip, i*16+8) << 32;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*13+ 7) = (uint64_t)SRC(ip, i*16+ 8) >> 32 | (uint64_t)SRC1(ip, i*16+9) << 20;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*13+ 8) = (uint64_t)SRC(ip, i*16+ 9) >> 44;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*13+ 8) |= (uint64_t)SRC(ip, i*16+10) << 8 | (uint64_t)SRC1(ip, i*16+11) << 60;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*13+ 9) = (uint64_t)SRC(ip, i*16+11) >> 4 | (uint64_t)SRC1(ip, i*16+12) << 48;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*13+10) = (uint64_t)SRC(ip, i*16+12) >> 16 | (uint64_t)SRC1(ip, i*16+13) << 36;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*13+11) = (uint64_t)SRC(ip, i*16+13) >> 28 | (uint64_t)SRC1(ip, i*16+14) << 24;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*13+12) = (uint64_t)SRC(ip, i*16+14) >> 40;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*13+12) |= (uint64_t)SRC(ip, i*16+15) << 12;\ +} + +#define BITPACK64_52(ip, op, parm) { \ + BITBLK64_52(ip, 0, op, parm);\ + BITBLK64_52(ip, 1, op, parm); SRCI(ip); op += 52*4/sizeof(op[0]);\ +} + +#define BITBLK64_53(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*53+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 53;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*53+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 11 | (uint64_t)SRC1(ip, i*64+2) << 42;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*53+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 22 | (uint64_t)SRC1(ip, i*64+3) << 31;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*53+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 33 | (uint64_t)SRC1(ip, i*64+4) << 20;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*53+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 44;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*53+ 4) |= (uint64_t)SRC(ip, i*64+ 5) << 9 | (uint64_t)SRC1(ip, i*64+6) << 62;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*53+ 5) = (uint64_t)SRC(ip, i*64+ 6) >> 2 | (uint64_t)SRC1(ip, i*64+7) << 51;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*53+ 6) = (uint64_t)SRC(ip, i*64+ 7) >> 13 | (uint64_t)SRC1(ip, i*64+8) << 40;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*53+ 7) = (uint64_t)SRC(ip, i*64+ 8) >> 24 | (uint64_t)SRC1(ip, i*64+9) << 29;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*53+ 8) = (uint64_t)SRC(ip, i*64+ 9) >> 35 | (uint64_t)SRC1(ip, i*64+10) << 18;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*53+ 9) = (uint64_t)SRC(ip, i*64+10) >> 46;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*53+ 9) |= (uint64_t)SRC(ip, i*64+11) << 7 | (uint64_t)SRC1(ip, i*64+12) << 60;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*53+10) = (uint64_t)SRC(ip, i*64+12) >> 4 | (uint64_t)SRC1(ip, i*64+13) << 49;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*53+11) = (uint64_t)SRC(ip, i*64+13) >> 15 | (uint64_t)SRC1(ip, i*64+14) << 38;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*53+12) = (uint64_t)SRC(ip, i*64+14) >> 26 | (uint64_t)SRC1(ip, i*64+15) << 27;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*53+13) = (uint64_t)SRC(ip, i*64+15) >> 37 | (uint64_t)SRC1(ip, i*64+16) << 16;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*53+14) = (uint64_t)SRC(ip, i*64+16) >> 48;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*53+14) |= (uint64_t)SRC(ip, i*64+17) << 5 | (uint64_t)SRC1(ip, i*64+18) << 58;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*53+15) = (uint64_t)SRC(ip, i*64+18) >> 6 | (uint64_t)SRC1(ip, i*64+19) << 47;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*53+16) = (uint64_t)SRC(ip, i*64+19) >> 17 | (uint64_t)SRC1(ip, i*64+20) << 36;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*53+17) = (uint64_t)SRC(ip, i*64+20) >> 28 | (uint64_t)SRC1(ip, i*64+21) << 25;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*53+18) = (uint64_t)SRC(ip, i*64+21) >> 39 | (uint64_t)SRC1(ip, i*64+22) << 14;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*53+19) = (uint64_t)SRC(ip, i*64+22) >> 50;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*53+19) |= (uint64_t)SRC(ip, i*64+23) << 3 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*53+20) = (uint64_t)SRC(ip, i*64+24) >> 8 | (uint64_t)SRC1(ip, i*64+25) << 45;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*53+21) = (uint64_t)SRC(ip, i*64+25) >> 19 | (uint64_t)SRC1(ip, i*64+26) << 34;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*53+22) = (uint64_t)SRC(ip, i*64+26) >> 30 | (uint64_t)SRC1(ip, i*64+27) << 23;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*53+23) = (uint64_t)SRC(ip, i*64+27) >> 41 | (uint64_t)SRC1(ip, i*64+28) << 12;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*53+24) = (uint64_t)SRC(ip, i*64+28) >> 52;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*53+24) |= (uint64_t)SRC(ip, i*64+29) << 1 | (uint64_t)SRC1(ip, i*64+30) << 54;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*53+25) = (uint64_t)SRC(ip, i*64+30) >> 10 | (uint64_t)SRC1(ip, i*64+31) << 43;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*53+26) = (uint64_t)SRC(ip, i*64+31) >> 21;\ +} + +#define BITPACK64_53(ip, op, parm) { \ + BITBLK64_53(ip, 0, op, parm); SRCI(ip); op += 53*4/sizeof(op[0]);\ +} + +#define BITBLK64_54(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*27+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 54;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*27+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 10 | (uint64_t)SRC1(ip, i*32+2) << 44;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*27+ 2) = (uint64_t)SRC(ip, i*32+ 2) >> 20 | (uint64_t)SRC1(ip, i*32+3) << 34;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*27+ 3) = (uint64_t)SRC(ip, i*32+ 3) >> 30 | (uint64_t)SRC1(ip, i*32+4) << 24;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*27+ 4) = (uint64_t)SRC(ip, i*32+ 4) >> 40 | (uint64_t)SRC1(ip, i*32+5) << 14;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*27+ 5) = (uint64_t)SRC(ip, i*32+ 5) >> 50;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*27+ 5) |= (uint64_t)SRC(ip, i*32+ 6) << 4 | (uint64_t)SRC1(ip, i*32+7) << 58;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*27+ 6) = (uint64_t)SRC(ip, i*32+ 7) >> 6 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*27+ 7) = (uint64_t)SRC(ip, i*32+ 8) >> 16 | (uint64_t)SRC1(ip, i*32+9) << 38;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*27+ 8) = (uint64_t)SRC(ip, i*32+ 9) >> 26 | (uint64_t)SRC1(ip, i*32+10) << 28;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*27+ 9) = (uint64_t)SRC(ip, i*32+10) >> 36 | (uint64_t)SRC1(ip, i*32+11) << 18;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*27+10) = (uint64_t)SRC(ip, i*32+11) >> 46;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*27+10) |= (uint64_t)SRC(ip, i*32+12) << 8 | (uint64_t)SRC1(ip, i*32+13) << 62;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*27+11) = (uint64_t)SRC(ip, i*32+13) >> 2 | (uint64_t)SRC1(ip, i*32+14) << 52;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*27+12) = (uint64_t)SRC(ip, i*32+14) >> 12 | (uint64_t)SRC1(ip, i*32+15) << 42;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*27+13) = (uint64_t)SRC(ip, i*32+15) >> 22 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*27+14) = (uint64_t)SRC(ip, i*32+16) >> 32 | (uint64_t)SRC1(ip, i*32+17) << 22;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*27+15) = (uint64_t)SRC(ip, i*32+17) >> 42 | (uint64_t)SRC1(ip, i*32+18) << 12;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*27+16) = (uint64_t)SRC(ip, i*32+18) >> 52;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*27+16) |= (uint64_t)SRC(ip, i*32+19) << 2 | (uint64_t)SRC1(ip, i*32+20) << 56;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*27+17) = (uint64_t)SRC(ip, i*32+20) >> 8 | (uint64_t)SRC1(ip, i*32+21) << 46;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*27+18) = (uint64_t)SRC(ip, i*32+21) >> 18 | (uint64_t)SRC1(ip, i*32+22) << 36;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*27+19) = (uint64_t)SRC(ip, i*32+22) >> 28 | (uint64_t)SRC1(ip, i*32+23) << 26;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*27+20) = (uint64_t)SRC(ip, i*32+23) >> 38 | (uint64_t)SRC1(ip, i*32+24) << 16;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*27+21) = (uint64_t)SRC(ip, i*32+24) >> 48;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*27+21) |= (uint64_t)SRC(ip, i*32+25) << 6 | (uint64_t)SRC1(ip, i*32+26) << 60;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*27+22) = (uint64_t)SRC(ip, i*32+26) >> 4 | (uint64_t)SRC1(ip, i*32+27) << 50;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*27+23) = (uint64_t)SRC(ip, i*32+27) >> 14 | (uint64_t)SRC1(ip, i*32+28) << 40;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*27+24) = (uint64_t)SRC(ip, i*32+28) >> 24 | (uint64_t)SRC1(ip, i*32+29) << 30;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*27+25) = (uint64_t)SRC(ip, i*32+29) >> 34 | (uint64_t)SRC1(ip, i*32+30) << 20;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*27+26) = (uint64_t)SRC(ip, i*32+30) >> 44;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*27+26) |= (uint64_t)SRC(ip, i*32+31) << 10;\ +} + +#define BITPACK64_54(ip, op, parm) { \ + BITBLK64_54(ip, 0, op, parm); SRCI(ip); op += 54*4/sizeof(op[0]);\ +} + +#define BITBLK64_55(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*55+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 55;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*55+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 9 | (uint64_t)SRC1(ip, i*64+2) << 46;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*55+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 18 | (uint64_t)SRC1(ip, i*64+3) << 37;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*55+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 27 | (uint64_t)SRC1(ip, i*64+4) << 28;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*55+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 36 | (uint64_t)SRC1(ip, i*64+5) << 19;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*55+ 5) = (uint64_t)SRC(ip, i*64+ 5) >> 45 | (uint64_t)SRC1(ip, i*64+6) << 10;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*55+ 6) = (uint64_t)SRC(ip, i*64+ 6) >> 54;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*55+ 6) |= (uint64_t)SRC(ip, i*64+ 7) << 1 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*55+ 7) = (uint64_t)SRC(ip, i*64+ 8) >> 8 | (uint64_t)SRC1(ip, i*64+9) << 47;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*55+ 8) = (uint64_t)SRC(ip, i*64+ 9) >> 17 | (uint64_t)SRC1(ip, i*64+10) << 38;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*55+ 9) = (uint64_t)SRC(ip, i*64+10) >> 26 | (uint64_t)SRC1(ip, i*64+11) << 29;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*55+10) = (uint64_t)SRC(ip, i*64+11) >> 35 | (uint64_t)SRC1(ip, i*64+12) << 20;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*55+11) = (uint64_t)SRC(ip, i*64+12) >> 44 | (uint64_t)SRC1(ip, i*64+13) << 11;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*55+12) = (uint64_t)SRC(ip, i*64+13) >> 53;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*55+12) |= (uint64_t)SRC(ip, i*64+14) << 2 | (uint64_t)SRC1(ip, i*64+15) << 57;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*55+13) = (uint64_t)SRC(ip, i*64+15) >> 7 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*55+14) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 39;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*55+15) = (uint64_t)SRC(ip, i*64+17) >> 25 | (uint64_t)SRC1(ip, i*64+18) << 30;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*55+16) = (uint64_t)SRC(ip, i*64+18) >> 34 | (uint64_t)SRC1(ip, i*64+19) << 21;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*55+17) = (uint64_t)SRC(ip, i*64+19) >> 43 | (uint64_t)SRC1(ip, i*64+20) << 12;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*55+18) = (uint64_t)SRC(ip, i*64+20) >> 52;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*55+18) |= (uint64_t)SRC(ip, i*64+21) << 3 | (uint64_t)SRC1(ip, i*64+22) << 58;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*55+19) = (uint64_t)SRC(ip, i*64+22) >> 6 | (uint64_t)SRC1(ip, i*64+23) << 49;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*55+20) = (uint64_t)SRC(ip, i*64+23) >> 15 | (uint64_t)SRC1(ip, i*64+24) << 40;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*55+21) = (uint64_t)SRC(ip, i*64+24) >> 24 | (uint64_t)SRC1(ip, i*64+25) << 31;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*55+22) = (uint64_t)SRC(ip, i*64+25) >> 33 | (uint64_t)SRC1(ip, i*64+26) << 22;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*55+23) = (uint64_t)SRC(ip, i*64+26) >> 42 | (uint64_t)SRC1(ip, i*64+27) << 13;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*55+24) = (uint64_t)SRC(ip, i*64+27) >> 51;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*55+24) |= (uint64_t)SRC(ip, i*64+28) << 4 | (uint64_t)SRC1(ip, i*64+29) << 59;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*55+25) = (uint64_t)SRC(ip, i*64+29) >> 5 | (uint64_t)SRC1(ip, i*64+30) << 50;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*55+26) = (uint64_t)SRC(ip, i*64+30) >> 14 | (uint64_t)SRC1(ip, i*64+31) << 41;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*55+27) = (uint64_t)SRC(ip, i*64+31) >> 23;\ +} + +#define BITPACK64_55(ip, op, parm) { \ + BITBLK64_55(ip, 0, op, parm); SRCI(ip); op += 55*4/sizeof(op[0]);\ +} + +#define BITBLK64_56(ip, i, op, parm) { ;\ + IPPB(ip, i*8+ 0, parm); *((uint64_t *)op+i*7+ 0) = (uint64_t)SRC(ip, i*8+ 0) | (uint64_t)SRC1(ip, i*8+1) << 56;\ + IPPB(ip, i*8+ 1, parm); *((uint64_t *)op+i*7+ 1) = (uint64_t)SRC(ip, i*8+ 1) >> 8 | (uint64_t)SRC1(ip, i*8+2) << 48;\ + IPPB(ip, i*8+ 2, parm); *((uint64_t *)op+i*7+ 2) = (uint64_t)SRC(ip, i*8+ 2) >> 16 | (uint64_t)SRC1(ip, i*8+3) << 40;\ + IPPB(ip, i*8+ 3, parm); *((uint64_t *)op+i*7+ 3) = (uint64_t)SRC(ip, i*8+ 3) >> 24 | (uint64_t)SRC1(ip, i*8+4) << 32;\ + IPPB(ip, i*8+ 4, parm); *((uint64_t *)op+i*7+ 4) = (uint64_t)SRC(ip, i*8+ 4) >> 32 | (uint64_t)SRC1(ip, i*8+5) << 24;\ + IPPB(ip, i*8+ 5, parm); *((uint64_t *)op+i*7+ 5) = (uint64_t)SRC(ip, i*8+ 5) >> 40 | (uint64_t)SRC1(ip, i*8+6) << 16;\ + IPPB(ip, i*8+ 6, parm); *((uint64_t *)op+i*7+ 6) = (uint64_t)SRC(ip, i*8+ 6) >> 48;\ + IPPB(ip, i*8+ 7, parm); *((uint64_t *)op+i*7+ 6) |= (uint64_t)SRC(ip, i*8+ 7) << 8;\ +} + +#define BITPACK64_56(ip, op, parm) { \ + BITBLK64_56(ip, 0, op, parm);\ + BITBLK64_56(ip, 1, op, parm);\ + BITBLK64_56(ip, 2, op, parm);\ + BITBLK64_56(ip, 3, op, parm); SRCI(ip); op += 56*4/sizeof(op[0]);\ +} + +#define BITBLK64_57(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*57+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 57;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*57+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 7 | (uint64_t)SRC1(ip, i*64+2) << 50;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*57+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 14 | (uint64_t)SRC1(ip, i*64+3) << 43;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*57+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 21 | (uint64_t)SRC1(ip, i*64+4) << 36;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*57+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 28 | (uint64_t)SRC1(ip, i*64+5) << 29;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*57+ 5) = (uint64_t)SRC(ip, i*64+ 5) >> 35 | (uint64_t)SRC1(ip, i*64+6) << 22;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*57+ 6) = (uint64_t)SRC(ip, i*64+ 6) >> 42 | (uint64_t)SRC1(ip, i*64+7) << 15;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*57+ 7) = (uint64_t)SRC(ip, i*64+ 7) >> 49 | (uint64_t)SRC1(ip, i*64+8) << 8;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*57+ 8) = (uint64_t)SRC(ip, i*64+ 8) >> 56;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*57+ 8) |= (uint64_t)SRC(ip, i*64+ 9) << 1 | (uint64_t)SRC1(ip, i*64+10) << 58;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*57+ 9) = (uint64_t)SRC(ip, i*64+10) >> 6 | (uint64_t)SRC1(ip, i*64+11) << 51;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*57+10) = (uint64_t)SRC(ip, i*64+11) >> 13 | (uint64_t)SRC1(ip, i*64+12) << 44;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*57+11) = (uint64_t)SRC(ip, i*64+12) >> 20 | (uint64_t)SRC1(ip, i*64+13) << 37;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*57+12) = (uint64_t)SRC(ip, i*64+13) >> 27 | (uint64_t)SRC1(ip, i*64+14) << 30;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*57+13) = (uint64_t)SRC(ip, i*64+14) >> 34 | (uint64_t)SRC1(ip, i*64+15) << 23;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*57+14) = (uint64_t)SRC(ip, i*64+15) >> 41 | (uint64_t)SRC1(ip, i*64+16) << 16;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*57+15) = (uint64_t)SRC(ip, i*64+16) >> 48 | (uint64_t)SRC1(ip, i*64+17) << 9;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*57+16) = (uint64_t)SRC(ip, i*64+17) >> 55;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*57+16) |= (uint64_t)SRC(ip, i*64+18) << 2 | (uint64_t)SRC1(ip, i*64+19) << 59;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*57+17) = (uint64_t)SRC(ip, i*64+19) >> 5 | (uint64_t)SRC1(ip, i*64+20) << 52;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*57+18) = (uint64_t)SRC(ip, i*64+20) >> 12 | (uint64_t)SRC1(ip, i*64+21) << 45;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*57+19) = (uint64_t)SRC(ip, i*64+21) >> 19 | (uint64_t)SRC1(ip, i*64+22) << 38;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*57+20) = (uint64_t)SRC(ip, i*64+22) >> 26 | (uint64_t)SRC1(ip, i*64+23) << 31;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*57+21) = (uint64_t)SRC(ip, i*64+23) >> 33 | (uint64_t)SRC1(ip, i*64+24) << 24;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*57+22) = (uint64_t)SRC(ip, i*64+24) >> 40 | (uint64_t)SRC1(ip, i*64+25) << 17;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*57+23) = (uint64_t)SRC(ip, i*64+25) >> 47 | (uint64_t)SRC1(ip, i*64+26) << 10;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*57+24) = (uint64_t)SRC(ip, i*64+26) >> 54;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*57+24) |= (uint64_t)SRC(ip, i*64+27) << 3 | (uint64_t)SRC1(ip, i*64+28) << 60;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*57+25) = (uint64_t)SRC(ip, i*64+28) >> 4 | (uint64_t)SRC1(ip, i*64+29) << 53;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*57+26) = (uint64_t)SRC(ip, i*64+29) >> 11 | (uint64_t)SRC1(ip, i*64+30) << 46;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*57+27) = (uint64_t)SRC(ip, i*64+30) >> 18 | (uint64_t)SRC1(ip, i*64+31) << 39;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*57+28) = (uint64_t)SRC(ip, i*64+31) >> 25;\ +} + +#define BITPACK64_57(ip, op, parm) { \ + BITBLK64_57(ip, 0, op, parm); SRCI(ip); op += 57*4/sizeof(op[0]);\ +} + +#define BITBLK64_58(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*29+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 58;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*29+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 6 | (uint64_t)SRC1(ip, i*32+2) << 52;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*29+ 2) = (uint64_t)SRC(ip, i*32+ 2) >> 12 | (uint64_t)SRC1(ip, i*32+3) << 46;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*29+ 3) = (uint64_t)SRC(ip, i*32+ 3) >> 18 | (uint64_t)SRC1(ip, i*32+4) << 40;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*29+ 4) = (uint64_t)SRC(ip, i*32+ 4) >> 24 | (uint64_t)SRC1(ip, i*32+5) << 34;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*29+ 5) = (uint64_t)SRC(ip, i*32+ 5) >> 30 | (uint64_t)SRC1(ip, i*32+6) << 28;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*29+ 6) = (uint64_t)SRC(ip, i*32+ 6) >> 36 | (uint64_t)SRC1(ip, i*32+7) << 22;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*29+ 7) = (uint64_t)SRC(ip, i*32+ 7) >> 42 | (uint64_t)SRC1(ip, i*32+8) << 16;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*29+ 8) = (uint64_t)SRC(ip, i*32+ 8) >> 48 | (uint64_t)SRC1(ip, i*32+9) << 10;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*29+ 9) = (uint64_t)SRC(ip, i*32+ 9) >> 54;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*29+ 9) |= (uint64_t)SRC(ip, i*32+10) << 4 | (uint64_t)SRC1(ip, i*32+11) << 62;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*29+10) = (uint64_t)SRC(ip, i*32+11) >> 2 | (uint64_t)SRC1(ip, i*32+12) << 56;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*29+11) = (uint64_t)SRC(ip, i*32+12) >> 8 | (uint64_t)SRC1(ip, i*32+13) << 50;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*29+12) = (uint64_t)SRC(ip, i*32+13) >> 14 | (uint64_t)SRC1(ip, i*32+14) << 44;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*29+13) = (uint64_t)SRC(ip, i*32+14) >> 20 | (uint64_t)SRC1(ip, i*32+15) << 38;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*29+14) = (uint64_t)SRC(ip, i*32+15) >> 26 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*29+15) = (uint64_t)SRC(ip, i*32+16) >> 32 | (uint64_t)SRC1(ip, i*32+17) << 26;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*29+16) = (uint64_t)SRC(ip, i*32+17) >> 38 | (uint64_t)SRC1(ip, i*32+18) << 20;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*29+17) = (uint64_t)SRC(ip, i*32+18) >> 44 | (uint64_t)SRC1(ip, i*32+19) << 14;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*29+18) = (uint64_t)SRC(ip, i*32+19) >> 50 | (uint64_t)SRC1(ip, i*32+20) << 8;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*29+19) = (uint64_t)SRC(ip, i*32+20) >> 56;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*29+19) |= (uint64_t)SRC(ip, i*32+21) << 2 | (uint64_t)SRC1(ip, i*32+22) << 60;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*29+20) = (uint64_t)SRC(ip, i*32+22) >> 4 | (uint64_t)SRC1(ip, i*32+23) << 54;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*29+21) = (uint64_t)SRC(ip, i*32+23) >> 10 | (uint64_t)SRC1(ip, i*32+24) << 48;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*29+22) = (uint64_t)SRC(ip, i*32+24) >> 16 | (uint64_t)SRC1(ip, i*32+25) << 42;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*29+23) = (uint64_t)SRC(ip, i*32+25) >> 22 | (uint64_t)SRC1(ip, i*32+26) << 36;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*29+24) = (uint64_t)SRC(ip, i*32+26) >> 28 | (uint64_t)SRC1(ip, i*32+27) << 30;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*29+25) = (uint64_t)SRC(ip, i*32+27) >> 34 | (uint64_t)SRC1(ip, i*32+28) << 24;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*29+26) = (uint64_t)SRC(ip, i*32+28) >> 40 | (uint64_t)SRC1(ip, i*32+29) << 18;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*29+27) = (uint64_t)SRC(ip, i*32+29) >> 46 | (uint64_t)SRC1(ip, i*32+30) << 12;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*29+28) = (uint64_t)SRC(ip, i*32+30) >> 52;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*29+28) |= (uint64_t)SRC(ip, i*32+31) << 6;\ +} + +#define BITPACK64_58(ip, op, parm) { \ + BITBLK64_58(ip, 0, op, parm); SRCI(ip); op += 58*4/sizeof(op[0]);\ +} + +#define BITBLK64_59(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*59+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 59;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*59+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 5 | (uint64_t)SRC1(ip, i*64+2) << 54;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*59+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 10 | (uint64_t)SRC1(ip, i*64+3) << 49;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*59+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 15 | (uint64_t)SRC1(ip, i*64+4) << 44;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*59+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 20 | (uint64_t)SRC1(ip, i*64+5) << 39;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*59+ 5) = (uint64_t)SRC(ip, i*64+ 5) >> 25 | (uint64_t)SRC1(ip, i*64+6) << 34;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*59+ 6) = (uint64_t)SRC(ip, i*64+ 6) >> 30 | (uint64_t)SRC1(ip, i*64+7) << 29;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*59+ 7) = (uint64_t)SRC(ip, i*64+ 7) >> 35 | (uint64_t)SRC1(ip, i*64+8) << 24;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*59+ 8) = (uint64_t)SRC(ip, i*64+ 8) >> 40 | (uint64_t)SRC1(ip, i*64+9) << 19;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*59+ 9) = (uint64_t)SRC(ip, i*64+ 9) >> 45 | (uint64_t)SRC1(ip, i*64+10) << 14;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*59+10) = (uint64_t)SRC(ip, i*64+10) >> 50 | (uint64_t)SRC1(ip, i*64+11) << 9;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*59+11) = (uint64_t)SRC(ip, i*64+11) >> 55;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*59+11) |= (uint64_t)SRC(ip, i*64+12) << 4 | (uint64_t)SRC1(ip, i*64+13) << 63;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*59+12) = (uint64_t)SRC(ip, i*64+13) >> 1 | (uint64_t)SRC1(ip, i*64+14) << 58;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*59+13) = (uint64_t)SRC(ip, i*64+14) >> 6 | (uint64_t)SRC1(ip, i*64+15) << 53;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*59+14) = (uint64_t)SRC(ip, i*64+15) >> 11 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*59+15) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 43;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*59+16) = (uint64_t)SRC(ip, i*64+17) >> 21 | (uint64_t)SRC1(ip, i*64+18) << 38;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*59+17) = (uint64_t)SRC(ip, i*64+18) >> 26 | (uint64_t)SRC1(ip, i*64+19) << 33;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*59+18) = (uint64_t)SRC(ip, i*64+19) >> 31 | (uint64_t)SRC1(ip, i*64+20) << 28;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*59+19) = (uint64_t)SRC(ip, i*64+20) >> 36 | (uint64_t)SRC1(ip, i*64+21) << 23;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*59+20) = (uint64_t)SRC(ip, i*64+21) >> 41 | (uint64_t)SRC1(ip, i*64+22) << 18;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*59+21) = (uint64_t)SRC(ip, i*64+22) >> 46 | (uint64_t)SRC1(ip, i*64+23) << 13;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*59+22) = (uint64_t)SRC(ip, i*64+23) >> 51 | (uint64_t)SRC1(ip, i*64+24) << 8;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*59+23) = (uint64_t)SRC(ip, i*64+24) >> 56;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*59+23) |= (uint64_t)SRC(ip, i*64+25) << 3 | (uint64_t)SRC1(ip, i*64+26) << 62;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*59+24) = (uint64_t)SRC(ip, i*64+26) >> 2 | (uint64_t)SRC1(ip, i*64+27) << 57;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*59+25) = (uint64_t)SRC(ip, i*64+27) >> 7 | (uint64_t)SRC1(ip, i*64+28) << 52;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*59+26) = (uint64_t)SRC(ip, i*64+28) >> 12 | (uint64_t)SRC1(ip, i*64+29) << 47;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*59+27) = (uint64_t)SRC(ip, i*64+29) >> 17 | (uint64_t)SRC1(ip, i*64+30) << 42;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*59+28) = (uint64_t)SRC(ip, i*64+30) >> 22 | (uint64_t)SRC1(ip, i*64+31) << 37;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*59+29) = (uint64_t)SRC(ip, i*64+31) >> 27;\ +} + +#define BITPACK64_59(ip, op, parm) { \ + BITBLK64_59(ip, 0, op, parm); SRCI(ip); op += 59*4/sizeof(op[0]);\ +} + +#define BITBLK64_60(ip, i, op, parm) { ;\ + IPPB(ip, i*16+ 0, parm); *((uint64_t *)op+i*15+ 0) = (uint64_t)SRC(ip, i*16+ 0) | (uint64_t)SRC1(ip, i*16+1) << 60;\ + IPPB(ip, i*16+ 1, parm); *((uint64_t *)op+i*15+ 1) = (uint64_t)SRC(ip, i*16+ 1) >> 4 | (uint64_t)SRC1(ip, i*16+2) << 56;\ + IPPB(ip, i*16+ 2, parm); *((uint64_t *)op+i*15+ 2) = (uint64_t)SRC(ip, i*16+ 2) >> 8 | (uint64_t)SRC1(ip, i*16+3) << 52;\ + IPPB(ip, i*16+ 3, parm); *((uint64_t *)op+i*15+ 3) = (uint64_t)SRC(ip, i*16+ 3) >> 12 | (uint64_t)SRC1(ip, i*16+4) << 48;\ + IPPB(ip, i*16+ 4, parm); *((uint64_t *)op+i*15+ 4) = (uint64_t)SRC(ip, i*16+ 4) >> 16 | (uint64_t)SRC1(ip, i*16+5) << 44;\ + IPPB(ip, i*16+ 5, parm); *((uint64_t *)op+i*15+ 5) = (uint64_t)SRC(ip, i*16+ 5) >> 20 | (uint64_t)SRC1(ip, i*16+6) << 40;\ + IPPB(ip, i*16+ 6, parm); *((uint64_t *)op+i*15+ 6) = (uint64_t)SRC(ip, i*16+ 6) >> 24 | (uint64_t)SRC1(ip, i*16+7) << 36;\ + IPPB(ip, i*16+ 7, parm); *((uint64_t *)op+i*15+ 7) = (uint64_t)SRC(ip, i*16+ 7) >> 28 | (uint64_t)SRC1(ip, i*16+8) << 32;\ + IPPB(ip, i*16+ 8, parm); *((uint64_t *)op+i*15+ 8) = (uint64_t)SRC(ip, i*16+ 8) >> 32 | (uint64_t)SRC1(ip, i*16+9) << 28;\ + IPPB(ip, i*16+ 9, parm); *((uint64_t *)op+i*15+ 9) = (uint64_t)SRC(ip, i*16+ 9) >> 36 | (uint64_t)SRC1(ip, i*16+10) << 24;\ + IPPB(ip, i*16+10, parm); *((uint64_t *)op+i*15+10) = (uint64_t)SRC(ip, i*16+10) >> 40 | (uint64_t)SRC1(ip, i*16+11) << 20;\ + IPPB(ip, i*16+11, parm); *((uint64_t *)op+i*15+11) = (uint64_t)SRC(ip, i*16+11) >> 44 | (uint64_t)SRC1(ip, i*16+12) << 16;\ + IPPB(ip, i*16+12, parm); *((uint64_t *)op+i*15+12) = (uint64_t)SRC(ip, i*16+12) >> 48 | (uint64_t)SRC1(ip, i*16+13) << 12;\ + IPPB(ip, i*16+13, parm); *((uint64_t *)op+i*15+13) = (uint64_t)SRC(ip, i*16+13) >> 52 | (uint64_t)SRC1(ip, i*16+14) << 8;\ + IPPB(ip, i*16+14, parm); *((uint64_t *)op+i*15+14) = (uint64_t)SRC(ip, i*16+14) >> 56;\ + IPPB(ip, i*16+15, parm); *((uint64_t *)op+i*15+14) |= (uint64_t)SRC(ip, i*16+15) << 4;\ +} + +#define BITPACK64_60(ip, op, parm) { \ + BITBLK64_60(ip, 0, op, parm);\ + BITBLK64_60(ip, 1, op, parm); SRCI(ip); op += 60*4/sizeof(op[0]);\ +} + +#define BITBLK64_61(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*61+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 61;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*61+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 3 | (uint64_t)SRC1(ip, i*64+2) << 58;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*61+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 6 | (uint64_t)SRC1(ip, i*64+3) << 55;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*61+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 9 | (uint64_t)SRC1(ip, i*64+4) << 52;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*61+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 12 | (uint64_t)SRC1(ip, i*64+5) << 49;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*61+ 5) = (uint64_t)SRC(ip, i*64+ 5) >> 15 | (uint64_t)SRC1(ip, i*64+6) << 46;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*61+ 6) = (uint64_t)SRC(ip, i*64+ 6) >> 18 | (uint64_t)SRC1(ip, i*64+7) << 43;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*61+ 7) = (uint64_t)SRC(ip, i*64+ 7) >> 21 | (uint64_t)SRC1(ip, i*64+8) << 40;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*61+ 8) = (uint64_t)SRC(ip, i*64+ 8) >> 24 | (uint64_t)SRC1(ip, i*64+9) << 37;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*61+ 9) = (uint64_t)SRC(ip, i*64+ 9) >> 27 | (uint64_t)SRC1(ip, i*64+10) << 34;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*61+10) = (uint64_t)SRC(ip, i*64+10) >> 30 | (uint64_t)SRC1(ip, i*64+11) << 31;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*61+11) = (uint64_t)SRC(ip, i*64+11) >> 33 | (uint64_t)SRC1(ip, i*64+12) << 28;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*61+12) = (uint64_t)SRC(ip, i*64+12) >> 36 | (uint64_t)SRC1(ip, i*64+13) << 25;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*61+13) = (uint64_t)SRC(ip, i*64+13) >> 39 | (uint64_t)SRC1(ip, i*64+14) << 22;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*61+14) = (uint64_t)SRC(ip, i*64+14) >> 42 | (uint64_t)SRC1(ip, i*64+15) << 19;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*61+15) = (uint64_t)SRC(ip, i*64+15) >> 45 | (uint64_t)SRC1(ip, i*64+16) << 16;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*61+16) = (uint64_t)SRC(ip, i*64+16) >> 48 | (uint64_t)SRC1(ip, i*64+17) << 13;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*61+17) = (uint64_t)SRC(ip, i*64+17) >> 51 | (uint64_t)SRC1(ip, i*64+18) << 10;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*61+18) = (uint64_t)SRC(ip, i*64+18) >> 54 | (uint64_t)SRC1(ip, i*64+19) << 7;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*61+19) = (uint64_t)SRC(ip, i*64+19) >> 57 | (uint64_t)SRC1(ip, i*64+20) << 4;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*61+20) = (uint64_t)SRC(ip, i*64+20) >> 60;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*61+20) |= (uint64_t)SRC(ip, i*64+21) << 1 | (uint64_t)SRC1(ip, i*64+22) << 62;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*61+21) = (uint64_t)SRC(ip, i*64+22) >> 2 | (uint64_t)SRC1(ip, i*64+23) << 59;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*61+22) = (uint64_t)SRC(ip, i*64+23) >> 5 | (uint64_t)SRC1(ip, i*64+24) << 56;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*61+23) = (uint64_t)SRC(ip, i*64+24) >> 8 | (uint64_t)SRC1(ip, i*64+25) << 53;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*61+24) = (uint64_t)SRC(ip, i*64+25) >> 11 | (uint64_t)SRC1(ip, i*64+26) << 50;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*61+25) = (uint64_t)SRC(ip, i*64+26) >> 14 | (uint64_t)SRC1(ip, i*64+27) << 47;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*61+26) = (uint64_t)SRC(ip, i*64+27) >> 17 | (uint64_t)SRC1(ip, i*64+28) << 44;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*61+27) = (uint64_t)SRC(ip, i*64+28) >> 20 | (uint64_t)SRC1(ip, i*64+29) << 41;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*61+28) = (uint64_t)SRC(ip, i*64+29) >> 23 | (uint64_t)SRC1(ip, i*64+30) << 38;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*61+29) = (uint64_t)SRC(ip, i*64+30) >> 26 | (uint64_t)SRC1(ip, i*64+31) << 35;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*61+30) = (uint64_t)SRC(ip, i*64+31) >> 29;\ +} + +#define BITPACK64_61(ip, op, parm) { \ + BITBLK64_61(ip, 0, op, parm); SRCI(ip); op += 61*4/sizeof(op[0]);\ +} + +#define BITBLK64_62(ip, i, op, parm) { ;\ + IPPB(ip, i*32+ 0, parm); *((uint64_t *)op+i*31+ 0) = (uint64_t)SRC(ip, i*32+ 0) | (uint64_t)SRC1(ip, i*32+1) << 62;\ + IPPB(ip, i*32+ 1, parm); *((uint64_t *)op+i*31+ 1) = (uint64_t)SRC(ip, i*32+ 1) >> 2 | (uint64_t)SRC1(ip, i*32+2) << 60;\ + IPPB(ip, i*32+ 2, parm); *((uint64_t *)op+i*31+ 2) = (uint64_t)SRC(ip, i*32+ 2) >> 4 | (uint64_t)SRC1(ip, i*32+3) << 58;\ + IPPB(ip, i*32+ 3, parm); *((uint64_t *)op+i*31+ 3) = (uint64_t)SRC(ip, i*32+ 3) >> 6 | (uint64_t)SRC1(ip, i*32+4) << 56;\ + IPPB(ip, i*32+ 4, parm); *((uint64_t *)op+i*31+ 4) = (uint64_t)SRC(ip, i*32+ 4) >> 8 | (uint64_t)SRC1(ip, i*32+5) << 54;\ + IPPB(ip, i*32+ 5, parm); *((uint64_t *)op+i*31+ 5) = (uint64_t)SRC(ip, i*32+ 5) >> 10 | (uint64_t)SRC1(ip, i*32+6) << 52;\ + IPPB(ip, i*32+ 6, parm); *((uint64_t *)op+i*31+ 6) = (uint64_t)SRC(ip, i*32+ 6) >> 12 | (uint64_t)SRC1(ip, i*32+7) << 50;\ + IPPB(ip, i*32+ 7, parm); *((uint64_t *)op+i*31+ 7) = (uint64_t)SRC(ip, i*32+ 7) >> 14 | (uint64_t)SRC1(ip, i*32+8) << 48;\ + IPPB(ip, i*32+ 8, parm); *((uint64_t *)op+i*31+ 8) = (uint64_t)SRC(ip, i*32+ 8) >> 16 | (uint64_t)SRC1(ip, i*32+9) << 46;\ + IPPB(ip, i*32+ 9, parm); *((uint64_t *)op+i*31+ 9) = (uint64_t)SRC(ip, i*32+ 9) >> 18 | (uint64_t)SRC1(ip, i*32+10) << 44;\ + IPPB(ip, i*32+10, parm); *((uint64_t *)op+i*31+10) = (uint64_t)SRC(ip, i*32+10) >> 20 | (uint64_t)SRC1(ip, i*32+11) << 42;\ + IPPB(ip, i*32+11, parm); *((uint64_t *)op+i*31+11) = (uint64_t)SRC(ip, i*32+11) >> 22 | (uint64_t)SRC1(ip, i*32+12) << 40;\ + IPPB(ip, i*32+12, parm); *((uint64_t *)op+i*31+12) = (uint64_t)SRC(ip, i*32+12) >> 24 | (uint64_t)SRC1(ip, i*32+13) << 38;\ + IPPB(ip, i*32+13, parm); *((uint64_t *)op+i*31+13) = (uint64_t)SRC(ip, i*32+13) >> 26 | (uint64_t)SRC1(ip, i*32+14) << 36;\ + IPPB(ip, i*32+14, parm); *((uint64_t *)op+i*31+14) = (uint64_t)SRC(ip, i*32+14) >> 28 | (uint64_t)SRC1(ip, i*32+15) << 34;\ + IPPB(ip, i*32+15, parm); *((uint64_t *)op+i*31+15) = (uint64_t)SRC(ip, i*32+15) >> 30 | (uint64_t)SRC1(ip, i*32+16) << 32;\ + IPPB(ip, i*32+16, parm); *((uint64_t *)op+i*31+16) = (uint64_t)SRC(ip, i*32+16) >> 32 | (uint64_t)SRC1(ip, i*32+17) << 30;\ + IPPB(ip, i*32+17, parm); *((uint64_t *)op+i*31+17) = (uint64_t)SRC(ip, i*32+17) >> 34 | (uint64_t)SRC1(ip, i*32+18) << 28;\ + IPPB(ip, i*32+18, parm); *((uint64_t *)op+i*31+18) = (uint64_t)SRC(ip, i*32+18) >> 36 | (uint64_t)SRC1(ip, i*32+19) << 26;\ + IPPB(ip, i*32+19, parm); *((uint64_t *)op+i*31+19) = (uint64_t)SRC(ip, i*32+19) >> 38 | (uint64_t)SRC1(ip, i*32+20) << 24;\ + IPPB(ip, i*32+20, parm); *((uint64_t *)op+i*31+20) = (uint64_t)SRC(ip, i*32+20) >> 40 | (uint64_t)SRC1(ip, i*32+21) << 22;\ + IPPB(ip, i*32+21, parm); *((uint64_t *)op+i*31+21) = (uint64_t)SRC(ip, i*32+21) >> 42 | (uint64_t)SRC1(ip, i*32+22) << 20;\ + IPPB(ip, i*32+22, parm); *((uint64_t *)op+i*31+22) = (uint64_t)SRC(ip, i*32+22) >> 44 | (uint64_t)SRC1(ip, i*32+23) << 18;\ + IPPB(ip, i*32+23, parm); *((uint64_t *)op+i*31+23) = (uint64_t)SRC(ip, i*32+23) >> 46 | (uint64_t)SRC1(ip, i*32+24) << 16;\ + IPPB(ip, i*32+24, parm); *((uint64_t *)op+i*31+24) = (uint64_t)SRC(ip, i*32+24) >> 48 | (uint64_t)SRC1(ip, i*32+25) << 14;\ + IPPB(ip, i*32+25, parm); *((uint64_t *)op+i*31+25) = (uint64_t)SRC(ip, i*32+25) >> 50 | (uint64_t)SRC1(ip, i*32+26) << 12;\ + IPPB(ip, i*32+26, parm); *((uint64_t *)op+i*31+26) = (uint64_t)SRC(ip, i*32+26) >> 52 | (uint64_t)SRC1(ip, i*32+27) << 10;\ + IPPB(ip, i*32+27, parm); *((uint64_t *)op+i*31+27) = (uint64_t)SRC(ip, i*32+27) >> 54 | (uint64_t)SRC1(ip, i*32+28) << 8;\ + IPPB(ip, i*32+28, parm); *((uint64_t *)op+i*31+28) = (uint64_t)SRC(ip, i*32+28) >> 56 | (uint64_t)SRC1(ip, i*32+29) << 6;\ + IPPB(ip, i*32+29, parm); *((uint64_t *)op+i*31+29) = (uint64_t)SRC(ip, i*32+29) >> 58 | (uint64_t)SRC1(ip, i*32+30) << 4;\ + IPPB(ip, i*32+30, parm); *((uint64_t *)op+i*31+30) = (uint64_t)SRC(ip, i*32+30) >> 60;\ + IPPB(ip, i*32+31, parm); *((uint64_t *)op+i*31+30) |= (uint64_t)SRC(ip, i*32+31) << 2;\ +} + +#define BITPACK64_62(ip, op, parm) { \ + BITBLK64_62(ip, 0, op, parm); SRCI(ip); op += 62*4/sizeof(op[0]);\ +} + +#define BITBLK64_63(ip, i, op, parm) { ;\ + IPPB(ip, i*64+ 0, parm); *((uint64_t *)op+i*63+ 0) = (uint64_t)SRC(ip, i*64+ 0) | (uint64_t)SRC1(ip, i*64+1) << 63;\ + IPPB(ip, i*64+ 1, parm); *((uint64_t *)op+i*63+ 1) = (uint64_t)SRC(ip, i*64+ 1) >> 1 | (uint64_t)SRC1(ip, i*64+2) << 62;\ + IPPB(ip, i*64+ 2, parm); *((uint64_t *)op+i*63+ 2) = (uint64_t)SRC(ip, i*64+ 2) >> 2 | (uint64_t)SRC1(ip, i*64+3) << 61;\ + IPPB(ip, i*64+ 3, parm); *((uint64_t *)op+i*63+ 3) = (uint64_t)SRC(ip, i*64+ 3) >> 3 | (uint64_t)SRC1(ip, i*64+4) << 60;\ + IPPB(ip, i*64+ 4, parm); *((uint64_t *)op+i*63+ 4) = (uint64_t)SRC(ip, i*64+ 4) >> 4 | (uint64_t)SRC1(ip, i*64+5) << 59;\ + IPPB(ip, i*64+ 5, parm); *((uint64_t *)op+i*63+ 5) = (uint64_t)SRC(ip, i*64+ 5) >> 5 | (uint64_t)SRC1(ip, i*64+6) << 58;\ + IPPB(ip, i*64+ 6, parm); *((uint64_t *)op+i*63+ 6) = (uint64_t)SRC(ip, i*64+ 6) >> 6 | (uint64_t)SRC1(ip, i*64+7) << 57;\ + IPPB(ip, i*64+ 7, parm); *((uint64_t *)op+i*63+ 7) = (uint64_t)SRC(ip, i*64+ 7) >> 7 | (uint64_t)SRC1(ip, i*64+8) << 56;\ + IPPB(ip, i*64+ 8, parm); *((uint64_t *)op+i*63+ 8) = (uint64_t)SRC(ip, i*64+ 8) >> 8 | (uint64_t)SRC1(ip, i*64+9) << 55;\ + IPPB(ip, i*64+ 9, parm); *((uint64_t *)op+i*63+ 9) = (uint64_t)SRC(ip, i*64+ 9) >> 9 | (uint64_t)SRC1(ip, i*64+10) << 54;\ + IPPB(ip, i*64+10, parm); *((uint64_t *)op+i*63+10) = (uint64_t)SRC(ip, i*64+10) >> 10 | (uint64_t)SRC1(ip, i*64+11) << 53;\ + IPPB(ip, i*64+11, parm); *((uint64_t *)op+i*63+11) = (uint64_t)SRC(ip, i*64+11) >> 11 | (uint64_t)SRC1(ip, i*64+12) << 52;\ + IPPB(ip, i*64+12, parm); *((uint64_t *)op+i*63+12) = (uint64_t)SRC(ip, i*64+12) >> 12 | (uint64_t)SRC1(ip, i*64+13) << 51;\ + IPPB(ip, i*64+13, parm); *((uint64_t *)op+i*63+13) = (uint64_t)SRC(ip, i*64+13) >> 13 | (uint64_t)SRC1(ip, i*64+14) << 50;\ + IPPB(ip, i*64+14, parm); *((uint64_t *)op+i*63+14) = (uint64_t)SRC(ip, i*64+14) >> 14 | (uint64_t)SRC1(ip, i*64+15) << 49;\ + IPPB(ip, i*64+15, parm); *((uint64_t *)op+i*63+15) = (uint64_t)SRC(ip, i*64+15) >> 15 | (uint64_t)SRC1(ip, i*64+16) << 48;\ + IPPB(ip, i*64+16, parm); *((uint64_t *)op+i*63+16) = (uint64_t)SRC(ip, i*64+16) >> 16 | (uint64_t)SRC1(ip, i*64+17) << 47;\ + IPPB(ip, i*64+17, parm); *((uint64_t *)op+i*63+17) = (uint64_t)SRC(ip, i*64+17) >> 17 | (uint64_t)SRC1(ip, i*64+18) << 46;\ + IPPB(ip, i*64+18, parm); *((uint64_t *)op+i*63+18) = (uint64_t)SRC(ip, i*64+18) >> 18 | (uint64_t)SRC1(ip, i*64+19) << 45;\ + IPPB(ip, i*64+19, parm); *((uint64_t *)op+i*63+19) = (uint64_t)SRC(ip, i*64+19) >> 19 | (uint64_t)SRC1(ip, i*64+20) << 44;\ + IPPB(ip, i*64+20, parm); *((uint64_t *)op+i*63+20) = (uint64_t)SRC(ip, i*64+20) >> 20 | (uint64_t)SRC1(ip, i*64+21) << 43;\ + IPPB(ip, i*64+21, parm); *((uint64_t *)op+i*63+21) = (uint64_t)SRC(ip, i*64+21) >> 21 | (uint64_t)SRC1(ip, i*64+22) << 42;\ + IPPB(ip, i*64+22, parm); *((uint64_t *)op+i*63+22) = (uint64_t)SRC(ip, i*64+22) >> 22 | (uint64_t)SRC1(ip, i*64+23) << 41;\ + IPPB(ip, i*64+23, parm); *((uint64_t *)op+i*63+23) = (uint64_t)SRC(ip, i*64+23) >> 23 | (uint64_t)SRC1(ip, i*64+24) << 40;\ + IPPB(ip, i*64+24, parm); *((uint64_t *)op+i*63+24) = (uint64_t)SRC(ip, i*64+24) >> 24 | (uint64_t)SRC1(ip, i*64+25) << 39;\ + IPPB(ip, i*64+25, parm); *((uint64_t *)op+i*63+25) = (uint64_t)SRC(ip, i*64+25) >> 25 | (uint64_t)SRC1(ip, i*64+26) << 38;\ + IPPB(ip, i*64+26, parm); *((uint64_t *)op+i*63+26) = (uint64_t)SRC(ip, i*64+26) >> 26 | (uint64_t)SRC1(ip, i*64+27) << 37;\ + IPPB(ip, i*64+27, parm); *((uint64_t *)op+i*63+27) = (uint64_t)SRC(ip, i*64+27) >> 27 | (uint64_t)SRC1(ip, i*64+28) << 36;\ + IPPB(ip, i*64+28, parm); *((uint64_t *)op+i*63+28) = (uint64_t)SRC(ip, i*64+28) >> 28 | (uint64_t)SRC1(ip, i*64+29) << 35;\ + IPPB(ip, i*64+29, parm); *((uint64_t *)op+i*63+29) = (uint64_t)SRC(ip, i*64+29) >> 29 | (uint64_t)SRC1(ip, i*64+30) << 34;\ + IPPB(ip, i*64+30, parm); *((uint64_t *)op+i*63+30) = (uint64_t)SRC(ip, i*64+30) >> 30 | (uint64_t)SRC1(ip, i*64+31) << 33;\ + IPPB(ip, i*64+31, parm); *((uint64_t *)op+i*63+31) = (uint64_t)SRC(ip, i*64+31) >> 31;\ +} + +#define BITPACK64_63(ip, op, parm) { \ + BITBLK64_63(ip, 0, op, parm); SRCI(ip); op += 63*4/sizeof(op[0]);\ +} + +#define BITBLK64_64(ip, i, op, parm) { ;\ + IPPB(ip, i*1+ 0, parm); *((uint64_t *)op+i*1+ 0) = (uint64_t)SRC(ip, i*1+ 0) ;\ +} + +#define BITPACK64_64(ip, op, parm) { \ + BITBLK64_64(ip, 0, op, parm);\ + BITBLK64_64(ip, 1, op, parm);\ + BITBLK64_64(ip, 2, op, parm);\ + BITBLK64_64(ip, 3, op, parm);\ + BITBLK64_64(ip, 4, op, parm);\ + BITBLK64_64(ip, 5, op, parm);\ + BITBLK64_64(ip, 6, op, parm);\ + BITBLK64_64(ip, 7, op, parm);\ + BITBLK64_64(ip, 8, op, parm);\ + BITBLK64_64(ip, 9, op, parm);\ + BITBLK64_64(ip, 10, op, parm);\ + BITBLK64_64(ip, 11, op, parm);\ + BITBLK64_64(ip, 12, op, parm);\ + BITBLK64_64(ip, 13, op, parm);\ + BITBLK64_64(ip, 14, op, parm);\ + BITBLK64_64(ip, 15, op, parm);\ + BITBLK64_64(ip, 16, op, parm);\ + BITBLK64_64(ip, 17, op, parm);\ + BITBLK64_64(ip, 18, op, parm);\ + BITBLK64_64(ip, 19, op, parm);\ + BITBLK64_64(ip, 20, op, parm);\ + BITBLK64_64(ip, 21, op, parm);\ + BITBLK64_64(ip, 22, op, parm);\ + BITBLK64_64(ip, 23, op, parm);\ + BITBLK64_64(ip, 24, op, parm);\ + BITBLK64_64(ip, 25, op, parm);\ + BITBLK64_64(ip, 26, op, parm);\ + BITBLK64_64(ip, 27, op, parm);\ + BITBLK64_64(ip, 28, op, parm);\ + BITBLK64_64(ip, 29, op, parm);\ + BITBLK64_64(ip, 30, op, parm);\ + BITBLK64_64(ip, 31, op, parm); SRCI(ip); op += 64*4/sizeof(op[0]);\ }