Files
TurboPFor-Integer-Compression/ext/MaskedVByte/src/varintdecode.c
powturbo 55156d368e .
2015-05-28 17:10:28 +02:00

1761 lines
76 KiB
C

#include "../include/varintdecode.h"
#include <x86intrin.h>
static const uint8_t vec_lookup[] __attribute__((aligned(0x1000))) = { 0, 32,
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 60, 86,
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58, 85, 161,
66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83, 160, 7,
46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145,
150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94,
76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99,
81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150,
146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0,
0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70,
109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79, 136, 145,
153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130, 71, 112,
148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145,
154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70, 110, 148,
165, 3, 39, 23, 122, 15, 62, 86, 161, 66, 98, 80, 140, 145, 153, 149, 0,
64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72, 116, 148, 167,
64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0,
0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2,
92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145, 153, 149, 0, 1, 91,
73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114, 148, 166, 64, 93,
75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91,
73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92,
74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 64, 91,
73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145, 152, 148, 169, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82,
160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 56, 85, 161,
66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 52, 83, 160, 6,
44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145,
150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50, 82, 160, 5, 42,
26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 61, 86, 161, 66, 98, 80,
139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82, 160, 4, 41, 25, 127,
70, 109, 148, 165, 2, 37, 21, 121, 13, 59, 85, 161, 66, 97, 79, 137,
145, 153, 149, 0, 1, 35, 19, 119, 11, 55, 83, 160, 7, 47, 31, 131, 71,
113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145, 150, 146, 158,
145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 129, 70,
111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99, 81, 143, 145,
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0, 0, 0, 0, 0, 0,
0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165,
2, 36, 20, 121, 12, 103, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1,
34, 18, 119, 10, 101, 83, 160, 6, 95, 77, 130, 71, 112, 148, 166, 64,
93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0,
33, 17, 118, 9, 100, 82, 160, 5, 94, 76, 128, 70, 110, 148, 165, 3, 92,
74, 122, 68, 104, 86, 161, 66, 98, 80, 141, 145, 153, 149, 0, 64, 91,
73, 120, 67, 102, 84, 160, 65, 96, 78, 135, 72, 117, 148, 167, 64, 150,
146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16,
118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2, 92, 74, 121,
68, 103, 85, 161, 66, 97, 79, 157, 145, 153, 149, 0, 1, 91, 73, 119, 67,
101, 83, 160, 65, 95, 77, 156, 71, 152, 148, 166, 64, 93, 75, 155, 69,
151, 147, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67,
100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92, 74, 155, 68,
151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2,
36, 20, 121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34,
18, 119, 10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75,
124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17,
118, 9, 50, 82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122,
14, 60, 86, 161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67,
102, 84, 160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155,
145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8,
49, 82, 160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58,
85, 161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83,
160, 7, 46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
161, 66, 99, 81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79,
136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130,
71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146,
158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70,
110, 148, 165, 3, 39, 23, 122, 15, 63, 86, 161, 66, 98, 80, 140, 145,
153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72,
116, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146,
159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70,
109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145,
153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114,
148, 166, 64, 93, 75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145,
154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152,
148, 165, 64, 92, 74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145,
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
152, 148, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 104, 86,
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 103, 85,
161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 101, 83,
160, 7, 95, 77, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
161, 66, 99, 81, 144, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 103, 85, 161, 66, 97,
79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 101, 83, 160, 6, 95, 77,
130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150,
146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 100, 82, 160, 5, 94, 76,
128, 70, 110, 148, 165, 3, 92, 74, 122, 68, 104, 86, 161, 66, 98, 80,
157, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
156, 72, 152, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127,
70, 109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 157,
145, 153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 156, 71,
152, 148, 166, 64, 93, 75, 155, 69, 151, 147, 162, 145, 150, 146, 158,
145, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 60, 86,
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58, 85, 161,
66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83, 160, 7,
46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145,
150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94,
76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99,
81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150,
146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0,
0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70,
109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79, 136, 145,
153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130, 71, 112,
148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145,
154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70, 110, 148,
165, 3, 39, 23, 122, 15, 62, 86, 161, 66, 98, 80, 140, 145, 153, 149, 0,
64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72, 116, 148, 167,
64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0,
0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2,
92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145, 153, 149, 0, 1, 91,
73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114, 148, 166, 64, 93,
75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91,
73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92,
74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 64, 91,
73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145, 152, 148, 169, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82,
160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 56, 85, 161,
66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 52, 83, 160, 6,
44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145,
150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50, 82, 160, 5, 42,
26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 61, 86, 161, 66, 98, 80,
139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82, 160, 4, 41, 25, 127,
70, 109, 148, 165, 2, 37, 21, 121, 13, 59, 85, 161, 66, 97, 79, 137,
145, 153, 149, 0, 1, 35, 19, 119, 11, 55, 83, 160, 7, 47, 31, 131, 71,
113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145, 150, 146, 158,
145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 129, 70,
111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99, 81, 143, 145,
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0, 0, 0, 0, 0, 0,
0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165,
2, 36, 20, 121, 12, 103, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1,
34, 18, 119, 10, 101, 83, 160, 6, 95, 77, 130, 71, 112, 148, 166, 64,
93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0,
33, 17, 118, 9, 100, 82, 160, 5, 94, 76, 128, 70, 110, 148, 165, 3, 92,
74, 122, 68, 104, 86, 161, 66, 98, 80, 141, 145, 153, 149, 0, 64, 91,
73, 120, 67, 102, 84, 160, 65, 96, 78, 135, 72, 117, 148, 167, 64, 150,
146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16,
118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2, 92, 74, 121,
68, 103, 85, 161, 66, 97, 79, 157, 145, 153, 149, 0, 1, 91, 73, 119, 67,
101, 83, 160, 65, 95, 77, 156, 71, 152, 148, 166, 64, 93, 75, 155, 69,
151, 147, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67,
100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92, 74, 155, 68,
151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2,
36, 20, 121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34,
18, 119, 10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75,
124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17,
118, 9, 50, 82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122,
14, 60, 86, 161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67,
102, 84, 160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155,
145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8,
49, 82, 160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58,
85, 161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83,
160, 7, 46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
161, 66, 99, 81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79,
136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130,
71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146,
158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70,
110, 148, 165, 3, 39, 23, 122, 15, 104, 86, 161, 66, 98, 80, 140, 145,
153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72,
116, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146,
159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70,
109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145,
153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114,
148, 166, 64, 93, 75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145,
154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152,
148, 165, 64, 92, 74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145,
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
152, 148, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 104, 86,
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 103, 85,
161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 101, 83,
160, 7, 95, 77, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
161, 66, 99, 81, 157, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 103, 85, 161, 66, 97,
79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 101, 83, 160, 6, 95, 77,
130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150,
146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 100, 82, 160, 5, 94, 76,
128, 70, 110, 148, 165, 3, 92, 74, 122, 68, 104, 86, 161, 66, 98, 80,
157, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
156, 72, 152, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
146, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
static int read_int(const uint8_t* in, uint32_t* out) {
*out = in[0] & 0x7F;
if (in[0] < 128) {
return 1;
}
*out = ((in[1] & 0x7FU) << 7) | *out;
if (in[1] < 128) {
return 2;
}
*out = ((in[2] & 0x7FU) << 14) | *out;
if (in[2] < 128) {
return 3;
}
*out = ((in[3] & 0x7FU) << 21) | *out;
if (in[3] < 128) {
return 4;
}
*out = ((in[4] & 0x7FU) << 28) | *out;
return 5;
}
static int read_int_delta(const uint8_t* in, uint32_t* out, uint32_t* prev) {
*out = in[0] & 0x7F;
if (in[0] < 128) {
*prev += *out;
*out = *prev;
return 1;
}
*out = ((in[1] & 0x7FU) << 7) | *out;
if (in[1] < 128) {
*prev += *out;
*out = *prev;
return 2;
}
*out = ((in[2] & 0x7FU) << 14) | *out;
if (in[2] < 128) {
*prev += *out;
*out = *prev;
return 3;
}
*out = ((in[3] & 0x7FU) << 21) | *out;
if (in[3] < 128) {
*prev += *out;
*out = *prev;
return 4;
}
*out = ((in[4] & 0x7FU) << 28) | *out;
*prev += *out;
*out = *prev;
return 5;
}
static const uint8_t bytes_consumed[] = { 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6,
7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8,
9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2,
7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9,
10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8,
9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7, 9, 2,
6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7, 7, 9,
7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9,
6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5, 5, 4,
5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0,
2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7,
8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8, 10, 10,
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 11, 8, 6, 6,
8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8, 10, 5, 8,
4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5,
7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2, 6, 6, 0,
7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10, 7, 10,
10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5,
5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4,
4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0,
0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9,
7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8,
5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9,
6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 11, 8, 6, 6, 8, 8, 9, 2, 6,
6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4,
4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7,
5, 6, 7, 9, 9, 7, 9, 11, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9, 7, 9,
11, 7, 5, 9, 11, 11, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3, 7,
2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6, 9,
6, 9, 9, 6, 6, 9, 9, 11, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5,
2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6,
7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7, 7, 8,
2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7, 7, 8,
7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 8,
6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 11, 2, 6, 6, 0, 4, 5, 5, 8,
5, 8, 8, 5, 5, 8, 8, 11, 8, 11, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3,
8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6,
7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7,
5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5,
6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3,
6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4, 3, 3,
4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8,
7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8,
7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3,
7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9,
8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7,
6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7,
9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7,
7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6,
6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5,
5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3,
3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8,
8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8,
10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 12,
8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8,
10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2,
6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10,
7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6,
5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5,
4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3,
0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8,
7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9,
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 8, 8, 6, 6, 8, 8,
9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3,
4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9,
7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 7, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9,
7, 9, 7, 7, 5, 9, 7, 7, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3,
7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6,
9, 6, 9, 9, 6, 6, 9, 9, 12, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3,
5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7,
6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7, 7,
8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7, 7,
8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6,
8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 6, 2, 6, 6, 0, 4, 5, 5,
8, 5, 8, 8, 5, 5, 8, 8, 5, 8, 5, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3,
8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6,
7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7,
5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5,
6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3,
6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4, 3, 3,
4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8,
7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8,
7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3,
7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9,
8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7,
6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7,
9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7,
7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6,
6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5,
5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3,
3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8,
8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8,
10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 11,
8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8,
10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2,
6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10,
7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6,
5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5,
4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3,
0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8,
7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9,
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 11, 8, 6, 6, 8, 8,
9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3,
4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9,
7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 11, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9,
7, 9, 11, 7, 5, 9, 11, 11, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3,
3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6,
6, 9, 6, 9, 9, 6, 6, 9, 9, 11, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3,
3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7,
7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7,
7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7,
7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6,
6, 8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 11, 2, 6, 6, 0, 4, 5,
5, 8, 5, 8, 8, 5, 5, 8, 8, 11, 8, 11, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2,
3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7,
6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5,
7, 7, 5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6,
5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6,
3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4,
3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7,
8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7,
8, 8, 7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2,
3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8,
9, 9, 8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5,
8, 8, 9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6,
7, 7, 6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6,
7, 7, 9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7,
4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5,
5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0,
4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9,
2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6,
7, 8, 8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7,
5, 8, 10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7,
0, 0, 6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8,
10, 8, 8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
10, 8, 10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7,
7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7,
7, 10, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4,
7, 7, 10, 7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5,
5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0,
4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0,
2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6,
7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5,
8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 8, 8, 6,
6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8,
4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5,
7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 7, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0,
7, 9, 9, 7, 9, 7, 7, 5, 9, 7, 7, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7,
2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6,
4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5,
5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0,
6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6,
6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7,
4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5,
8, 6, 6, 8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 6, 2, 6, 6, 0,
4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 5, 8, 5, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8,
2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6,
7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5,
5, 7, 7, 5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0,
6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6,
6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0,
4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, };
typedef struct index_bytes_consumed {
uint8_t index;
uint8_t bytes_consumed;
} index_bytes_consumed;
static index_bytes_consumed combined_lookup[sizeof(bytes_consumed)];
static __m128i vectors[170];
void simdvbyteinit(void) {
vectors[0] = _mm_setr_epi8(0, -1, 4, -1, 1, -1, 5, -1, 2, -1, -1, -1, 3, -1,
-1, -1);
vectors[1] = _mm_setr_epi8(0, -1, 4, -1, 1, -1, 5, 6, 2, -1, -1, -1, 3, -1,
-1, -1);
vectors[2] = _mm_setr_epi8(0, -1, 4, 5, 1, -1, 6, -1, 2, -1, -1, -1, 3, -1,
-1, -1);
vectors[3] = _mm_setr_epi8(0, -1, 4, 5, 1, -1, 6, 7, 2, -1, -1, -1, 3, -1,
-1, -1);
vectors[4] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, -1, 2, -1, -1, -1, 3, 4,
-1, -1);
vectors[5] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, 7, 2, -1, -1, -1, 3, 4,
-1, -1);
vectors[6] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, -1, 2, -1, -1, -1, 3, 4,
-1, -1);
vectors[7] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, 8, 2, -1, -1, -1, 3, 4,
-1, -1);
vectors[8] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, -1, 2, 3, -1, -1, 4, -1,
-1, -1);
vectors[9] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, 7, 2, 3, -1, -1, 4, -1,
-1, -1);
vectors[10] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, -1, 2, 3, -1, -1, 4, -1,
-1, -1);
vectors[11] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, 8, 2, 3, -1, -1, 4, -1,
-1, -1);
vectors[12] = _mm_setr_epi8(0, -1, 6, -1, 1, -1, 7, -1, 2, 3, -1, -1, 4, 5,
-1, -1);
vectors[13] = _mm_setr_epi8(0, -1, 6, -1, 1, -1, 7, 8, 2, 3, -1, -1, 4, 5,
-1, -1);
vectors[14] = _mm_setr_epi8(0, -1, 6, 7, 1, -1, 8, -1, 2, 3, -1, -1, 4, 5,
-1, -1);
vectors[15] = _mm_setr_epi8(0, -1, 6, 7, 1, -1, 8, 9, 2, 3, -1, -1, 4, 5,
-1, -1);
vectors[16] = _mm_setr_epi8(0, -1, 5, -1, 1, 2, 6, -1, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[17] = _mm_setr_epi8(0, -1, 5, -1, 1, 2, 6, 7, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[18] = _mm_setr_epi8(0, -1, 5, 6, 1, 2, 7, -1, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[19] = _mm_setr_epi8(0, -1, 5, 6, 1, 2, 7, 8, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[20] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, -1, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[21] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, 8, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[22] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, -1, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[23] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, 9, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[24] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, -1, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[25] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, 8, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[26] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, -1, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[27] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, 9, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[28] = _mm_setr_epi8(0, -1, 7, -1, 1, 2, 8, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[29] = _mm_setr_epi8(0, -1, 7, -1, 1, 2, 8, 9, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[30] = _mm_setr_epi8(0, -1, 7, 8, 1, 2, 9, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[31] = _mm_setr_epi8(0, -1, 7, 8, 1, 2, 9, 10, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[32] = _mm_setr_epi8(0, 1, 5, -1, 2, -1, 6, -1, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[33] = _mm_setr_epi8(0, 1, 5, -1, 2, -1, 6, 7, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[34] = _mm_setr_epi8(0, 1, 5, 6, 2, -1, 7, -1, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[35] = _mm_setr_epi8(0, 1, 5, 6, 2, -1, 7, 8, 3, -1, -1, -1, 4, -1,
-1, -1);
vectors[36] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, -1, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[37] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, 8, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[38] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, -1, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[39] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, 9, 3, -1, -1, -1, 4, 5,
-1, -1);
vectors[40] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, -1, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[41] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, 8, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[42] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, -1, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[43] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, 9, 3, 4, -1, -1, 5, -1,
-1, -1);
vectors[44] = _mm_setr_epi8(0, 1, 7, -1, 2, -1, 8, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[45] = _mm_setr_epi8(0, 1, 7, -1, 2, -1, 8, 9, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[46] = _mm_setr_epi8(0, 1, 7, 8, 2, -1, 9, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[47] = _mm_setr_epi8(0, 1, 7, 8, 2, -1, 9, 10, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[48] = _mm_setr_epi8(0, 1, 6, -1, 2, 3, 7, -1, 4, -1, -1, -1, 5, -1,
-1, -1);
vectors[49] = _mm_setr_epi8(0, 1, 6, -1, 2, 3, 7, 8, 4, -1, -1, -1, 5, -1,
-1, -1);
vectors[50] = _mm_setr_epi8(0, 1, 6, 7, 2, 3, 8, -1, 4, -1, -1, -1, 5, -1,
-1, -1);
vectors[51] = _mm_setr_epi8(0, 1, 6, 7, 2, 3, 8, 9, 4, -1, -1, -1, 5, -1,
-1, -1);
vectors[52] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, -1, 4, -1, -1, -1, 5, 6,
-1, -1);
vectors[53] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, 9, 4, -1, -1, -1, 5, 6,
-1, -1);
vectors[54] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, -1, 4, -1, -1, -1, 5, 6,
-1, -1);
vectors[55] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, 10, 4, -1, -1, -1, 5, 6,
-1, -1);
vectors[56] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, -1, 4, 5, -1, -1, 6, -1,
-1, -1);
vectors[57] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, 9, 4, 5, -1, -1, 6, -1,
-1, -1);
vectors[58] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, -1, 4, 5, -1, -1, 6, -1,
-1, -1);
vectors[59] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, 10, 4, 5, -1, -1, 6, -1,
-1, -1);
vectors[60] = _mm_setr_epi8(0, 1, 8, -1, 2, 3, 9, -1, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[61] = _mm_setr_epi8(0, 1, 8, -1, 2, 3, 9, 10, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[62] = _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, -1, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[63] = _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[64] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
-1, -1, -1);
vectors[65] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
4, -1, -1);
vectors[66] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
4, 5, -1);
vectors[67] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
-1, -1, -1);
vectors[68] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
5, -1, -1);
vectors[69] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
5, 6, -1);
vectors[70] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5,
-1, -1, -1);
vectors[71] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6,
-1, -1);
vectors[72] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6,
7, -1);
vectors[73] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
-1, -1, -1);
vectors[74] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
5, -1, -1);
vectors[75] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
5, 6, -1);
vectors[76] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5,
-1, -1, -1);
vectors[77] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[78] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6,
7, -1);
vectors[79] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1,
-1, -1);
vectors[80] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7,
-1, -1);
vectors[81] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7,
8, -1);
vectors[82] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5,
-1, -1, -1);
vectors[83] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6,
-1, -1);
vectors[84] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6,
7, -1);
vectors[85] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1,
-1, -1);
vectors[86] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[87] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7,
8, -1);
vectors[88] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1,
-1, -1);
vectors[89] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8,
-1, -1);
vectors[90] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8,
9, -1);
vectors[91] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
-1, -1, -1);
vectors[92] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
5, -1, -1);
vectors[93] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
5, 6, -1);
vectors[94] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5,
-1, -1, -1);
vectors[95] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6,
-1, -1);
vectors[96] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6,
7, -1);
vectors[97] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1,
-1, -1);
vectors[98] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7,
-1, -1);
vectors[99] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7,
8, -1);
vectors[100] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
-1, -1, -1);
vectors[101] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
6, -1, -1);
vectors[102] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
6, 7, -1);
vectors[103] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6,
-1, -1, -1);
vectors[104] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[105] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7,
8, -1);
vectors[106] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1,
-1, -1);
vectors[107] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8,
-1, -1);
vectors[108] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8,
9, -1);
vectors[109] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6,
-1, -1, -1);
vectors[110] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7,
-1, -1);
vectors[111] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7,
8, -1);
vectors[112] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1,
-1, -1);
vectors[113] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8,
-1, -1);
vectors[114] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8,
9, -1);
vectors[115] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1,
-1, -1);
vectors[116] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9,
-1, -1);
vectors[117] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9,
10, -1);
vectors[118] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
-1, -1, -1);
vectors[119] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
6, -1, -1);
vectors[120] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
6, 7, -1);
vectors[121] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6,
-1, -1, -1);
vectors[122] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7,
-1, -1);
vectors[123] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7,
8, -1);
vectors[124] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1,
-1, -1);
vectors[125] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8,
-1, -1);
vectors[126] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8,
9, -1);
vectors[127] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6,
-1, -1, -1);
vectors[128] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7,
-1, -1);
vectors[129] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7,
8, -1);
vectors[130] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1,
-1, -1);
vectors[131] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8,
-1, -1);
vectors[132] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8,
9, -1);
vectors[133] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1,
-1, -1);
vectors[134] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9,
-1, -1);
vectors[135] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9,
10, -1);
vectors[136] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1,
-1, -1);
vectors[137] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8,
-1, -1);
vectors[138] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8,
9, -1);
vectors[139] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1,
-1, -1);
vectors[140] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9,
-1, -1);
vectors[141] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9,
10, -1);
vectors[142] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1,
-1, -1);
vectors[143] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10,
-1, -1);
vectors[144] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10,
11, -1);
vectors[145] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1,
-1, -1, -1, 1);
vectors[146] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, -1, -1,
-1, -1, -1, 1);
vectors[147] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1,
-1, -1, -1, 1);
vectors[148] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4,
-1, -1, 1);
vectors[149] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4,
-1, 5, 1);
vectors[150] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1,
-1, -1, -1, 2);
vectors[151] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, -1, -1,
-1, -1, -1, 2);
vectors[152] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, -1,
-1, -1, 2);
vectors[153] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5,
-1, -1, 2);
vectors[154] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5,
-1, 6, 2);
vectors[155] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, -1, -1, -1, -1,
-1, -1, -1, 3);
vectors[156] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, -1, -1, -1,
-1, -1, 3);
vectors[157] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, -1,
-1, -1, 3);
vectors[158] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6,
-1, -1, 3);
vectors[159] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6,
-1, 7, 3);
vectors[160] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, -1, -1, -1, -1, -1,
-1, -1, 4);
vectors[161] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, -1, -1, -1,
-1, -1, 4);
vectors[162] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, -1,
-1, -1, 4);
vectors[163] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7,
-1, -1, 4);
vectors[164] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7,
-1, 8, 4);
vectors[165] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, -1, -1, -1, -1, -1,
-1, -1, 5);
vectors[166] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, -1, -1, -1,
-1, -1, 5);
vectors[167] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, -1,
-1, -1, 5);
vectors[168] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1,
-1, 5);
vectors[169] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1,
9, 5);
uint64_t i;
for (i = 0; i < sizeof(bytes_consumed); i++) {
index_bytes_consumed combined = { vec_lookup[i], bytes_consumed[i] };
combined_lookup[i] = combined;
}
}
static uint64_t masked_vbyte_read_group(const uint8_t* in, uint32_t* out,
uint64_t mask, uint64_t* ints_read) {
__m128i initial = _mm_lddqu_si128((const __m128i *) (in));
__m128i * mout = (__m128i *) out;
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 1, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 2, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 3, result);
*ints_read = 16;
return 16;
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint64_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
__m128i shuffle_vector = vectors[index];
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a = _mm_and_si128(packed_result,
_mm_set1_epi32(0x0000FFFF));
_mm_storeu_si128(mout, unpacked_result_a);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
_mm_storel_epi64(mout+1, unpacked_result_b);
//_mm_storeu_si128(mout + 1, unpacked_result_b); // maybe faster to write 16 bytes?
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x0000007F));
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x00007F00));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
_mm_storeu_si128(mout, result);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
-1));
_mm_storel_epi64(mout, result);
//_mm_storeu_si128(mout, result); // maybe faster to write 16 bytes?
return consumed;
}
__m128i PrefixSum(__m128i curr, __m128i prev) {
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B C] (already done)
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BC CD]
Add = _mm_slli_si128(curr, 8); // Cycle 3: [- - A AB]
curr = _mm_add_epi32(curr, prev); // Cycle 3: [PA PAB PBC PCD]
curr = _mm_add_epi32(curr, Add); // Cycle 4: [PA PAB PABC PABCD]
return curr;
}
// only the first two ints of curr are meaningful, rest is garbage to beignored
__m128i PrefixSum2ints(__m128i curr, __m128i prev) {
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B G] (already done)
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BG GG]
curr = _mm_shuffle_epi32(curr, 0x54); //Cycle 3:[A AB AB AB]
curr = _mm_add_epi32(curr, prev); // Cycle 4: [PA PAB PAB PAB]
return curr;
}
static uint64_t masked_vbyte_read_group_delta(const uint8_t* in, uint32_t* out,
uint64_t mask, uint64_t* ints_read, __m128i * prev) {
__m128i initial = _mm_lddqu_si128((const __m128i *) (in));
__m128i * mout = (__m128i *) out;
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 1, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 2, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 3, *prev);
*ints_read = 16;
return 16;
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint64_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
__m128i shuffle_vector = vectors[index];
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a = _mm_and_si128(packed_result,
_mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
_mm_storeu_si128(mout, *prev);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
_mm_storel_epi64(mout + 1, *prev);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x0000007F));
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x00007F00));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
-1));
*prev = PrefixSum2ints(result, *prev);
_mm_storel_epi64(mout, *prev);
return consumed;
}
static int read_int_group(const uint8_t* in, uint32_t* out, int* ints_read) {
__m128i initial = _mm_lddqu_si128((const __m128i *) in);
__m128i * const mout = (__m128i *) out;
int mask = _mm_movemask_epi8(initial);
if (mask == 0) {
__m128i result;
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout, result);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout + 1, result);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout + 2, result);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 3, result);
*ints_read = 16;
return 16;
}
int mask2 = mask & 0xFFF;
index_bytes_consumed combined = combined_lookup[mask2];
int index = combined.index;
__m128i shuffle_vector = vectors[index];
int consumed = combined.bytes_consumed;
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a = _mm_and_si128(packed_result,
_mm_set1_epi32(0x0000FFFF));
_mm_storeu_si128(mout, unpacked_result_a);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
_mm_storel_epi64(mout + 1, unpacked_result_b);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x0000007F));
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x00007F00));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
_mm_storeu_si128(mout, result);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
-1));
_mm_storel_epi64(mout, result);
return consumed;
}
// len_signed : number of ints we want to decode
size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out,
uint64_t length) {
size_t consumed = 0; // number of bytes read
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
int availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
while (count + 96 < length) { // 96 == 48 + 48 ahead for scanning
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group(in + consumed,
out + count, sig, &ints_read);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
}
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (availablebytes + count < length) {
if (availablebytes < 16) {
if (availablebytes + count + 31 < length) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
uint64_t newsig2 = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes + 16
+ consumed)));
sig |= (newsig << availablebytes)
| (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + count + 15 < length) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint64_t eaten = masked_vbyte_read_group(in + consumed, out + count,
sig, &ints_read);
consumed += eaten;
availablebytes -= eaten;
sig >>= eaten;
count += ints_read;
}
for (; count < length; count++) {
consumed += read_int(in + consumed, out + count);
}
return consumed;
}
// inputsize : number of input bytes we want to decode
// returns the number of written ints
size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out,
size_t inputsize) {
size_t consumed = 0; // number of bytes read
uint32_t * initout = out;
uint64_t sig = 0;
int availablebytes = 0;
if (96 < inputsize) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
while (scanned + 48 <= inputsize) { // 96 == 48 + 48 ahead for scanning
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group(in + consumed,
out, sig, &ints_read);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
out += ints_read;
}
}
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (1) {
if (availablebytes < 16) {
if (availablebytes + consumed + 31 < inputsize) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
uint64_t newsig2 = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes + 16
+ consumed)));
sig |= (newsig << availablebytes)
| (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if(availablebytes + consumed + 15 < inputsize ) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group(in + consumed, out,
sig, &ints_read);
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
out += ints_read;
}
while (consumed < inputsize) {
unsigned int shift = 0;
uint32_t v;
for (v = 0; consumed < inputsize; shift += 7) {
uint8_t c = in[consumed++];
if ((c & 128) == 0) {
out[0] = v + (c << shift);
++out;
break;
} else {
v += (c & 127) << shift;
}
}
}
return out - initout;
}
size_t read_ints(const uint8_t* in, uint32_t* out, int length) {
size_t consumed = 0;
int count;
for (count = 0; count + 15 < length;) {
int ints_read;
consumed += read_int_group(in + consumed, out + count, &ints_read);
count += ints_read;
}
for (; count < length; count++) {
consumed += read_int(in + consumed, out + count);
}
return consumed;
}
static int read_int_group_delta(const uint8_t* in, uint32_t* out,
int* ints_read, __m128i * prev) {
__m128i initial = _mm_lddqu_si128((const __m128i *) in);
__m128i * const mout = (__m128i *) out;
int mask = _mm_movemask_epi8(initial);
if (mask == 0) {
__m128i result;
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 1, *prev);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 2, *prev);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 3, *prev);
*ints_read = 16;
return 16;
}
int mask2 = mask & 0xFFF;
index_bytes_consumed combined = combined_lookup[mask2];
int index = combined.index;
__m128i shuffle_vector = vectors[index];
int consumed = combined.bytes_consumed;
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a = _mm_and_si128(packed_result,
_mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
_mm_storeu_si128(mout, *prev);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
_mm_storeu_si128(mout + 1, *prev);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x0000007F));
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x00007F00));
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
_mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
-1));
*prev = PrefixSum2ints(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
// len_signed : number of ints we want to decode
size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out,
uint64_t length, uint32_t prev) {
//uint64_t length = (uint64_t) len_signed; // number of ints we want to decode
size_t consumed = 0; // number of bytes read
__m128i mprev = _mm_set1_epi32(prev);
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
int availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
while (count + 96 < length) { // 96 == 48 + 48 ahead for scanning
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed,
out + count, sig, &ints_read, &mprev);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
}
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (availablebytes + count < length) {
if (availablebytes < 16) break;
if (availablebytes < 16) {
if (availablebytes + count + 31 < length) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
uint64_t newsig2 = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes + 16
+ consumed)));
sig |= (newsig << availablebytes)
| (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + count + 15 < length) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint64_t eaten = masked_vbyte_read_group_delta(in + consumed, out + count,
sig, &ints_read, &mprev);
consumed += eaten;
availablebytes -= eaten;
sig >>= eaten;
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < length; count++) {
consumed += read_int_delta(in + consumed, out + count, &prev);
}
return consumed;
}
size_t read_ints_delta(const uint8_t* in, uint32_t* out, int length,
uint32_t prev) {
__m128i mprev = _mm_set1_epi32(prev);
size_t consumed = 0;
int count;
for (count = 0; count + 15 < length;) {
int ints_read;
consumed += read_int_group_delta(in + consumed, out + count, &ints_read,
&mprev);
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < length; count++) {
consumed += read_int_delta(in + consumed, out + count, &prev);
}
return consumed;
}
// inputsize : number of input bytes we want to decode
// returns the number of written ints
size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out,
size_t inputsize, uint32_t prev) {
size_t consumed = 0; // number of bytes read
uint32_t * initout = out;
__m128i mprev = _mm_set1_epi32(prev);
uint64_t sig = 0;
int availablebytes = 0;
if (96 < inputsize) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
while (scanned + 48 <= inputsize) { // 96 == 48 + 48 ahead for scanning
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed,
out, sig, &ints_read, &mprev);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
out += ints_read;
}
}
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (1) {
if (availablebytes < 16) {
if (availablebytes + consumed + 31 < inputsize) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
uint64_t newsig2 = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes + 16
+ consumed)));
sig |= (newsig << availablebytes)
| (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if(availablebytes + consumed + 15 < inputsize ) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128(
(const __m128i *) (in + availablebytes
+ consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out,
sig, &ints_read, &mprev);
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
out += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
while (consumed < inputsize) {
unsigned int shift = 0; uint32_t v;
for (v = 0; consumed < inputsize; shift += 7) {
uint8_t c = in[consumed++];
if ((c & 128) == 0) {
uint32_t delta = v + (c << shift);
prev += delta;
*out++ = prev;
break;
} else {
v += (c & 127) << shift;
}
}
}
return out - initout;
}