1761 lines
76 KiB
C
1761 lines
76 KiB
C
#include "../include/varintdecode.h"
|
|
|
|
#include <x86intrin.h>
|
|
|
|
static const uint8_t vec_lookup[] __attribute__((aligned(0x1000))) = { 0, 32,
|
|
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
|
|
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
|
|
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
|
|
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
|
|
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 60, 86,
|
|
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
|
|
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
|
|
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
|
|
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58, 85, 161,
|
|
66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83, 160, 7,
|
|
46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145,
|
|
150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94,
|
|
76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99,
|
|
81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150,
|
|
146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70,
|
|
109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79, 136, 145,
|
|
153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130, 71, 112,
|
|
148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145,
|
|
154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70, 110, 148,
|
|
165, 3, 39, 23, 122, 15, 62, 86, 161, 66, 98, 80, 140, 145, 153, 149, 0,
|
|
64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72, 116, 148, 167,
|
|
64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0,
|
|
0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2,
|
|
92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145, 153, 149, 0, 1, 91,
|
|
73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114, 148, 166, 64, 93,
|
|
75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91,
|
|
73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92,
|
|
74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 64, 91,
|
|
73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145, 152, 148, 169, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82,
|
|
160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 56, 85, 161,
|
|
66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 52, 83, 160, 6,
|
|
44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145,
|
|
150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50, 82, 160, 5, 42,
|
|
26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 61, 86, 161, 66, 98, 80,
|
|
139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
|
|
133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
|
|
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82, 160, 4, 41, 25, 127,
|
|
70, 109, 148, 165, 2, 37, 21, 121, 13, 59, 85, 161, 66, 97, 79, 137,
|
|
145, 153, 149, 0, 1, 35, 19, 119, 11, 55, 83, 160, 7, 47, 31, 131, 71,
|
|
113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145, 150, 146, 158,
|
|
145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 129, 70,
|
|
111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99, 81, 143, 145,
|
|
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
|
|
152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165,
|
|
2, 36, 20, 121, 12, 103, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1,
|
|
34, 18, 119, 10, 101, 83, 160, 6, 95, 77, 130, 71, 112, 148, 166, 64,
|
|
93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0,
|
|
33, 17, 118, 9, 100, 82, 160, 5, 94, 76, 128, 70, 110, 148, 165, 3, 92,
|
|
74, 122, 68, 104, 86, 161, 66, 98, 80, 141, 145, 153, 149, 0, 64, 91,
|
|
73, 120, 67, 102, 84, 160, 65, 96, 78, 135, 72, 117, 148, 167, 64, 150,
|
|
146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16,
|
|
118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2, 92, 74, 121,
|
|
68, 103, 85, 161, 66, 97, 79, 157, 145, 153, 149, 0, 1, 91, 73, 119, 67,
|
|
101, 83, 160, 65, 95, 77, 156, 71, 152, 148, 166, 64, 93, 75, 155, 69,
|
|
151, 147, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67,
|
|
100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92, 74, 155, 68,
|
|
151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2,
|
|
36, 20, 121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34,
|
|
18, 119, 10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75,
|
|
124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17,
|
|
118, 9, 50, 82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122,
|
|
14, 60, 86, 161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67,
|
|
102, 84, 160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155,
|
|
145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8,
|
|
49, 82, 160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58,
|
|
85, 161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83,
|
|
160, 7, 46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
|
|
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
|
|
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
|
|
161, 66, 99, 81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
|
|
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
|
|
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
|
|
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79,
|
|
136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130,
|
|
71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146,
|
|
158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70,
|
|
110, 148, 165, 3, 39, 23, 122, 15, 63, 86, 161, 66, 98, 80, 140, 145,
|
|
153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72,
|
|
116, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146,
|
|
159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70,
|
|
109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145,
|
|
153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114,
|
|
148, 166, 64, 93, 75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145,
|
|
154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152,
|
|
148, 165, 64, 92, 74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145,
|
|
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
|
|
152, 148, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
|
|
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
|
|
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
|
|
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
|
|
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
|
|
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 104, 86,
|
|
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
|
|
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
|
|
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
|
|
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 103, 85,
|
|
161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 101, 83,
|
|
160, 7, 95, 77, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
|
|
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
|
|
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
|
|
161, 66, 99, 81, 144, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
|
|
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
|
|
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
|
|
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 103, 85, 161, 66, 97,
|
|
79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 101, 83, 160, 6, 95, 77,
|
|
130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150,
|
|
146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 100, 82, 160, 5, 94, 76,
|
|
128, 70, 110, 148, 165, 3, 92, 74, 122, 68, 104, 86, 161, 66, 98, 80,
|
|
157, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
|
|
156, 72, 152, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
|
|
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127,
|
|
70, 109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 157,
|
|
145, 153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 156, 71,
|
|
152, 148, 166, 64, 93, 75, 155, 69, 151, 147, 162, 145, 150, 146, 158,
|
|
145, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
|
|
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
|
|
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
|
|
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
|
|
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
|
|
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 60, 86,
|
|
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
|
|
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
|
|
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
|
|
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58, 85, 161,
|
|
66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83, 160, 7,
|
|
46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145,
|
|
150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94,
|
|
76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99,
|
|
81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150,
|
|
146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70,
|
|
109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79, 136, 145,
|
|
153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130, 71, 112,
|
|
148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145,
|
|
154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70, 110, 148,
|
|
165, 3, 39, 23, 122, 15, 62, 86, 161, 66, 98, 80, 140, 145, 153, 149, 0,
|
|
64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72, 116, 148, 167,
|
|
64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0,
|
|
0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2,
|
|
92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145, 153, 149, 0, 1, 91,
|
|
73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114, 148, 166, 64, 93,
|
|
75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91,
|
|
73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92,
|
|
74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 64, 91,
|
|
73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145, 152, 148, 169, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82,
|
|
160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 56, 85, 161,
|
|
66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 52, 83, 160, 6,
|
|
44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145,
|
|
150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50, 82, 160, 5, 42,
|
|
26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 61, 86, 161, 66, 98, 80,
|
|
139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
|
|
133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
|
|
146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82, 160, 4, 41, 25, 127,
|
|
70, 109, 148, 165, 2, 37, 21, 121, 13, 59, 85, 161, 66, 97, 79, 137,
|
|
145, 153, 149, 0, 1, 35, 19, 119, 11, 55, 83, 160, 7, 47, 31, 131, 71,
|
|
113, 148, 166, 64, 93, 75, 125, 69, 107, 89, 162, 145, 150, 146, 158,
|
|
145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 129, 70,
|
|
111, 148, 165, 64, 92, 74, 123, 68, 105, 87, 161, 66, 99, 81, 143, 145,
|
|
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
|
|
152, 148, 168, 64, 150, 146, 155, 145, 151, 147, 164, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165,
|
|
2, 36, 20, 121, 12, 103, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1,
|
|
34, 18, 119, 10, 101, 83, 160, 6, 95, 77, 130, 71, 112, 148, 166, 64,
|
|
93, 75, 124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0,
|
|
33, 17, 118, 9, 100, 82, 160, 5, 94, 76, 128, 70, 110, 148, 165, 3, 92,
|
|
74, 122, 68, 104, 86, 161, 66, 98, 80, 141, 145, 153, 149, 0, 64, 91,
|
|
73, 120, 67, 102, 84, 160, 65, 96, 78, 135, 72, 117, 148, 167, 64, 150,
|
|
146, 155, 145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16,
|
|
118, 8, 100, 82, 160, 4, 94, 76, 127, 70, 109, 148, 165, 2, 92, 74, 121,
|
|
68, 103, 85, 161, 66, 97, 79, 157, 145, 153, 149, 0, 1, 91, 73, 119, 67,
|
|
101, 83, 160, 65, 95, 77, 156, 71, 152, 148, 166, 64, 93, 75, 155, 69,
|
|
151, 147, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67,
|
|
100, 82, 160, 65, 94, 76, 156, 70, 152, 148, 165, 64, 92, 74, 155, 68,
|
|
151, 147, 161, 66, 150, 146, 157, 145, 153, 149, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2,
|
|
36, 20, 121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34,
|
|
18, 119, 10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75,
|
|
124, 69, 106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17,
|
|
118, 9, 50, 82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122,
|
|
14, 60, 86, 161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67,
|
|
102, 84, 160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155,
|
|
145, 151, 147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8,
|
|
49, 82, 160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 58,
|
|
85, 161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 54, 83,
|
|
160, 7, 46, 30, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
|
|
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
|
|
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
|
|
161, 66, 99, 81, 142, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
|
|
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
|
|
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
|
|
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 57, 85, 161, 66, 97, 79,
|
|
136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 53, 83, 160, 6, 45, 29, 130,
|
|
71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150, 146,
|
|
158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 51, 82, 160, 5, 43, 27, 128, 70,
|
|
110, 148, 165, 3, 39, 23, 122, 15, 104, 86, 161, 66, 98, 80, 140, 145,
|
|
153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78, 134, 72,
|
|
116, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150, 146,
|
|
159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 100, 82, 160, 4, 94, 76, 127, 70,
|
|
109, 148, 165, 2, 92, 74, 121, 68, 103, 85, 161, 66, 97, 79, 138, 145,
|
|
153, 149, 0, 1, 91, 73, 119, 67, 101, 83, 160, 65, 95, 77, 132, 71, 114,
|
|
148, 166, 64, 93, 75, 126, 69, 108, 90, 162, 145, 150, 146, 158, 145,
|
|
154, 0, 0, 0, 91, 73, 118, 67, 100, 82, 160, 65, 94, 76, 156, 70, 152,
|
|
148, 165, 64, 92, 74, 155, 68, 151, 147, 161, 66, 150, 146, 157, 145,
|
|
153, 149, 0, 64, 91, 73, 155, 67, 151, 147, 160, 65, 150, 146, 156, 145,
|
|
152, 148, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32,
|
|
16, 118, 8, 48, 82, 160, 4, 40, 24, 127, 70, 109, 148, 165, 2, 36, 20,
|
|
121, 12, 56, 85, 161, 66, 97, 79, 136, 145, 153, 149, 0, 1, 34, 18, 119,
|
|
10, 52, 83, 160, 6, 44, 28, 130, 71, 112, 148, 166, 64, 93, 75, 124, 69,
|
|
106, 88, 162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 50,
|
|
82, 160, 5, 42, 26, 128, 70, 110, 148, 165, 3, 38, 22, 122, 14, 104, 86,
|
|
161, 66, 98, 80, 139, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84,
|
|
160, 65, 96, 78, 133, 72, 115, 148, 167, 64, 150, 146, 155, 145, 151,
|
|
147, 163, 145, 150, 146, 159, 0, 0, 0, 0, 0, 32, 16, 118, 8, 49, 82,
|
|
160, 4, 41, 25, 127, 70, 109, 148, 165, 2, 37, 21, 121, 13, 103, 85,
|
|
161, 66, 97, 79, 137, 145, 153, 149, 0, 1, 35, 19, 119, 11, 101, 83,
|
|
160, 7, 95, 77, 131, 71, 113, 148, 166, 64, 93, 75, 125, 69, 107, 89,
|
|
162, 145, 150, 146, 158, 145, 154, 0, 0, 0, 91, 73, 118, 67, 100, 82,
|
|
160, 65, 94, 76, 129, 70, 111, 148, 165, 64, 92, 74, 123, 68, 105, 87,
|
|
161, 66, 99, 81, 157, 145, 153, 149, 0, 64, 91, 73, 155, 67, 151, 147,
|
|
160, 65, 150, 146, 156, 145, 152, 148, 168, 64, 150, 146, 155, 145, 151,
|
|
147, 164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 16, 118, 8, 48, 82, 160, 4, 40,
|
|
24, 127, 70, 109, 148, 165, 2, 36, 20, 121, 12, 103, 85, 161, 66, 97,
|
|
79, 136, 145, 153, 149, 0, 1, 34, 18, 119, 10, 101, 83, 160, 6, 95, 77,
|
|
130, 71, 112, 148, 166, 64, 93, 75, 124, 69, 106, 88, 162, 145, 150,
|
|
146, 158, 145, 154, 0, 0, 0, 33, 17, 118, 9, 100, 82, 160, 5, 94, 76,
|
|
128, 70, 110, 148, 165, 3, 92, 74, 122, 68, 104, 86, 161, 66, 98, 80,
|
|
157, 145, 153, 149, 0, 64, 91, 73, 120, 67, 102, 84, 160, 65, 96, 78,
|
|
156, 72, 152, 148, 167, 64, 150, 146, 155, 145, 151, 147, 163, 145, 150,
|
|
146, 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
|
|
|
|
static int read_int(const uint8_t* in, uint32_t* out) {
|
|
*out = in[0] & 0x7F;
|
|
if (in[0] < 128) {
|
|
return 1;
|
|
}
|
|
*out = ((in[1] & 0x7FU) << 7) | *out;
|
|
if (in[1] < 128) {
|
|
return 2;
|
|
}
|
|
*out = ((in[2] & 0x7FU) << 14) | *out;
|
|
if (in[2] < 128) {
|
|
return 3;
|
|
}
|
|
*out = ((in[3] & 0x7FU) << 21) | *out;
|
|
if (in[3] < 128) {
|
|
return 4;
|
|
}
|
|
*out = ((in[4] & 0x7FU) << 28) | *out;
|
|
return 5;
|
|
}
|
|
|
|
static int read_int_delta(const uint8_t* in, uint32_t* out, uint32_t* prev) {
|
|
*out = in[0] & 0x7F;
|
|
if (in[0] < 128) {
|
|
*prev += *out;
|
|
*out = *prev;
|
|
return 1;
|
|
}
|
|
*out = ((in[1] & 0x7FU) << 7) | *out;
|
|
if (in[1] < 128) {
|
|
*prev += *out;
|
|
*out = *prev;
|
|
return 2;
|
|
}
|
|
*out = ((in[2] & 0x7FU) << 14) | *out;
|
|
if (in[2] < 128) {
|
|
*prev += *out;
|
|
*out = *prev;
|
|
return 3;
|
|
}
|
|
*out = ((in[3] & 0x7FU) << 21) | *out;
|
|
if (in[3] < 128) {
|
|
*prev += *out;
|
|
*out = *prev;
|
|
return 4;
|
|
}
|
|
*out = ((in[4] & 0x7FU) << 28) | *out;
|
|
*prev += *out;
|
|
*out = *prev;
|
|
return 5;
|
|
}
|
|
|
|
static const uint8_t bytes_consumed[] = { 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6,
|
|
7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8,
|
|
9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2,
|
|
7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9,
|
|
10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8,
|
|
9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
|
|
9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7, 9, 2,
|
|
6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7, 7, 9,
|
|
7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9,
|
|
6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5, 5, 4,
|
|
5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0,
|
|
2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7,
|
|
8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8, 10, 10,
|
|
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
|
|
6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 11, 8, 6, 6,
|
|
8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8, 10, 5, 8,
|
|
4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5,
|
|
7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2, 6, 6, 0,
|
|
7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10, 7, 10,
|
|
10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5,
|
|
5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4,
|
|
4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0,
|
|
0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9,
|
|
7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8,
|
|
5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9,
|
|
6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 11, 8, 6, 6, 8, 8, 9, 2, 6,
|
|
6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4,
|
|
4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7,
|
|
5, 6, 7, 9, 9, 7, 9, 11, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9, 7, 9,
|
|
11, 7, 5, 9, 11, 11, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3, 7,
|
|
2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6, 9,
|
|
6, 9, 9, 6, 6, 9, 9, 11, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5,
|
|
2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6,
|
|
7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7, 7, 8,
|
|
2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7, 7, 8,
|
|
7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 8,
|
|
6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 11, 2, 6, 6, 0, 4, 5, 5, 8,
|
|
5, 8, 8, 5, 5, 8, 8, 11, 8, 11, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3,
|
|
8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6,
|
|
7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7,
|
|
5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5,
|
|
6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3,
|
|
6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4, 3, 3,
|
|
4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8,
|
|
7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8,
|
|
7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3,
|
|
7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9,
|
|
8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
|
|
9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7,
|
|
6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7,
|
|
9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7,
|
|
7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6,
|
|
6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5,
|
|
5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3,
|
|
3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8,
|
|
8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8,
|
|
10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
|
|
6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 12,
|
|
8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8,
|
|
10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
|
|
6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2,
|
|
6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10,
|
|
7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6,
|
|
5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5,
|
|
4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3,
|
|
0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8,
|
|
7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9,
|
|
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
|
|
6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 8, 8, 6, 6, 8, 8,
|
|
9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3,
|
|
4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9,
|
|
7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 7, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9,
|
|
7, 9, 7, 7, 5, 9, 7, 7, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3,
|
|
7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6,
|
|
9, 6, 9, 9, 6, 6, 9, 9, 12, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3,
|
|
5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7,
|
|
6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7, 7,
|
|
8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7, 7,
|
|
8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6,
|
|
8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 6, 2, 6, 6, 0, 4, 5, 5,
|
|
8, 5, 8, 8, 5, 5, 8, 8, 5, 8, 5, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3,
|
|
8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6,
|
|
7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7,
|
|
5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5,
|
|
6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3,
|
|
6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4, 3, 3,
|
|
4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8,
|
|
7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8,
|
|
7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3,
|
|
7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9,
|
|
8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
|
|
9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7,
|
|
6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6, 7, 7,
|
|
9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7, 4, 7,
|
|
7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6,
|
|
6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0, 4, 5,
|
|
5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3,
|
|
3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8,
|
|
8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7, 5, 8,
|
|
10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
|
|
6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8, 10, 11,
|
|
8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 10, 8,
|
|
10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7,
|
|
6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 10, 2,
|
|
6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4, 7, 7, 10,
|
|
7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6,
|
|
5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5,
|
|
4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3,
|
|
0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8,
|
|
7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5, 8, 9, 9,
|
|
8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8,
|
|
6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 11, 8, 6, 6, 8, 8,
|
|
9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8, 4, 3, 3,
|
|
4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5, 7, 9, 9,
|
|
7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 11, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0, 7, 9, 9,
|
|
7, 9, 11, 7, 5, 9, 11, 11, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7, 2, 3,
|
|
3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6, 4, 6,
|
|
6, 9, 6, 9, 9, 6, 6, 9, 9, 11, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3,
|
|
3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7,
|
|
7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6, 6, 7,
|
|
7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7, 4, 7,
|
|
7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5, 8, 6,
|
|
6, 8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 11, 2, 6, 6, 0, 4, 5,
|
|
5, 8, 5, 8, 8, 5, 5, 8, 8, 11, 8, 11, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2,
|
|
3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7,
|
|
6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5,
|
|
7, 7, 5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6,
|
|
5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6,
|
|
3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0, 4,
|
|
3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7,
|
|
8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7,
|
|
8, 8, 7, 8, 9, 7, 5, 8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2,
|
|
3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8,
|
|
9, 9, 8, 9, 10, 8, 6, 6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5,
|
|
8, 8, 9, 8, 9, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6,
|
|
7, 7, 6, 7, 9, 6, 5, 7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 10, 7, 6, 6,
|
|
7, 7, 9, 2, 6, 6, 0, 7, 9, 9, 7, 9, 10, 7, 5, 9, 10, 10, 9, 7, 9, 5, 7,
|
|
4, 7, 7, 9, 7, 9, 9, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5,
|
|
5, 6, 6, 9, 6, 9, 5, 6, 4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 10, 2, 6, 6, 0,
|
|
4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9,
|
|
2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6,
|
|
7, 8, 8, 7, 8, 10, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 10, 7,
|
|
5, 8, 10, 10, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7,
|
|
0, 0, 6, 8, 8, 6, 8, 10, 6, 5, 8, 10, 10, 8, 6, 8, 5, 6, 8, 10, 10, 8,
|
|
10, 8, 8, 6, 6, 8, 8, 10, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8,
|
|
10, 8, 10, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7,
|
|
7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6, 7, 6, 6, 7, 6, 7, 7, 6, 6, 7,
|
|
7, 10, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5, 5, 7, 7, 10, 7, 10, 5, 7, 4,
|
|
7, 7, 10, 7, 10, 10, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5,
|
|
5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6, 6, 3, 3, 6, 2, 6, 6, 0,
|
|
4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 10, 4, 3, 3, 4, 2, 4, 4, 0,
|
|
2, 3, 3, 0, 2, 0, 0, 0, 6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6,
|
|
7, 8, 8, 7, 8, 9, 7, 6, 6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 9, 7, 5,
|
|
8, 9, 9, 8, 7, 8, 5, 7, 4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0,
|
|
6, 8, 8, 6, 8, 9, 6, 5, 8, 9, 9, 8, 6, 8, 5, 6, 8, 9, 9, 8, 9, 8, 8, 6,
|
|
6, 8, 8, 9, 2, 6, 6, 0, 4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 9, 8, 9, 5, 8,
|
|
4, 3, 3, 4, 2, 4, 4, 8, 2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 9, 6, 5,
|
|
7, 9, 9, 7, 6, 7, 5, 6, 7, 9, 9, 7, 9, 7, 7, 6, 6, 7, 7, 9, 2, 6, 6, 0,
|
|
7, 9, 9, 7, 9, 7, 7, 5, 9, 7, 7, 9, 7, 9, 5, 7, 4, 7, 7, 9, 7, 9, 9, 7,
|
|
2, 3, 3, 7, 2, 7, 0, 0, 6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 9, 6, 9, 5, 6,
|
|
4, 6, 6, 9, 6, 9, 9, 6, 6, 9, 9, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5,
|
|
5, 3, 3, 5, 2, 5, 5, 9, 4, 3, 3, 4, 2, 4, 4, 9, 2, 3, 3, 0, 2, 0, 0, 0,
|
|
6, 7, 7, 6, 7, 8, 6, 5, 7, 8, 8, 7, 6, 7, 5, 6, 7, 8, 8, 7, 8, 7, 7, 6,
|
|
6, 7, 7, 8, 2, 6, 6, 0, 7, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 8, 7, 8, 5, 7,
|
|
4, 7, 7, 8, 7, 8, 8, 7, 2, 3, 3, 7, 2, 7, 0, 0, 6, 8, 8, 6, 8, 6, 6, 5,
|
|
8, 6, 6, 8, 6, 8, 5, 6, 8, 6, 6, 8, 6, 8, 8, 6, 6, 8, 8, 6, 2, 6, 6, 0,
|
|
4, 5, 5, 8, 5, 8, 8, 5, 5, 8, 8, 5, 8, 5, 5, 8, 4, 3, 3, 4, 2, 4, 4, 8,
|
|
2, 3, 3, 8, 2, 0, 0, 0, 6, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 7, 6, 7, 5, 6,
|
|
7, 6, 6, 7, 6, 7, 7, 6, 6, 7, 7, 6, 2, 6, 6, 0, 7, 5, 5, 7, 5, 7, 7, 5,
|
|
5, 7, 7, 5, 7, 5, 5, 7, 4, 7, 7, 4, 7, 4, 4, 7, 2, 3, 3, 7, 2, 7, 0, 0,
|
|
6, 5, 5, 6, 5, 6, 6, 5, 5, 6, 6, 5, 6, 5, 5, 6, 4, 6, 6, 4, 6, 4, 4, 6,
|
|
6, 3, 3, 6, 2, 6, 6, 0, 4, 5, 5, 4, 5, 4, 4, 5, 5, 3, 3, 5, 2, 5, 5, 0,
|
|
4, 3, 3, 4, 2, 4, 4, 0, 2, 3, 3, 0, 2, 0, 0, 0, };
|
|
|
|
typedef struct index_bytes_consumed {
|
|
uint8_t index;
|
|
uint8_t bytes_consumed;
|
|
} index_bytes_consumed;
|
|
|
|
static index_bytes_consumed combined_lookup[sizeof(bytes_consumed)];
|
|
|
|
static __m128i vectors[170];
|
|
|
|
void simdvbyteinit(void) {
|
|
vectors[0] = _mm_setr_epi8(0, -1, 4, -1, 1, -1, 5, -1, 2, -1, -1, -1, 3, -1,
|
|
-1, -1);
|
|
vectors[1] = _mm_setr_epi8(0, -1, 4, -1, 1, -1, 5, 6, 2, -1, -1, -1, 3, -1,
|
|
-1, -1);
|
|
vectors[2] = _mm_setr_epi8(0, -1, 4, 5, 1, -1, 6, -1, 2, -1, -1, -1, 3, -1,
|
|
-1, -1);
|
|
vectors[3] = _mm_setr_epi8(0, -1, 4, 5, 1, -1, 6, 7, 2, -1, -1, -1, 3, -1,
|
|
-1, -1);
|
|
vectors[4] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, -1, 2, -1, -1, -1, 3, 4,
|
|
-1, -1);
|
|
vectors[5] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, 7, 2, -1, -1, -1, 3, 4,
|
|
-1, -1);
|
|
vectors[6] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, -1, 2, -1, -1, -1, 3, 4,
|
|
-1, -1);
|
|
vectors[7] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, 8, 2, -1, -1, -1, 3, 4,
|
|
-1, -1);
|
|
vectors[8] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, -1, 2, 3, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[9] = _mm_setr_epi8(0, -1, 5, -1, 1, -1, 6, 7, 2, 3, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[10] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, -1, 2, 3, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[11] = _mm_setr_epi8(0, -1, 5, 6, 1, -1, 7, 8, 2, 3, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[12] = _mm_setr_epi8(0, -1, 6, -1, 1, -1, 7, -1, 2, 3, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[13] = _mm_setr_epi8(0, -1, 6, -1, 1, -1, 7, 8, 2, 3, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[14] = _mm_setr_epi8(0, -1, 6, 7, 1, -1, 8, -1, 2, 3, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[15] = _mm_setr_epi8(0, -1, 6, 7, 1, -1, 8, 9, 2, 3, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[16] = _mm_setr_epi8(0, -1, 5, -1, 1, 2, 6, -1, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[17] = _mm_setr_epi8(0, -1, 5, -1, 1, 2, 6, 7, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[18] = _mm_setr_epi8(0, -1, 5, 6, 1, 2, 7, -1, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[19] = _mm_setr_epi8(0, -1, 5, 6, 1, 2, 7, 8, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[20] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, -1, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[21] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, 8, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[22] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, -1, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[23] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, 9, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[24] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, -1, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[25] = _mm_setr_epi8(0, -1, 6, -1, 1, 2, 7, 8, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[26] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, -1, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[27] = _mm_setr_epi8(0, -1, 6, 7, 1, 2, 8, 9, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[28] = _mm_setr_epi8(0, -1, 7, -1, 1, 2, 8, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[29] = _mm_setr_epi8(0, -1, 7, -1, 1, 2, 8, 9, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[30] = _mm_setr_epi8(0, -1, 7, 8, 1, 2, 9, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[31] = _mm_setr_epi8(0, -1, 7, 8, 1, 2, 9, 10, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[32] = _mm_setr_epi8(0, 1, 5, -1, 2, -1, 6, -1, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[33] = _mm_setr_epi8(0, 1, 5, -1, 2, -1, 6, 7, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[34] = _mm_setr_epi8(0, 1, 5, 6, 2, -1, 7, -1, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[35] = _mm_setr_epi8(0, 1, 5, 6, 2, -1, 7, 8, 3, -1, -1, -1, 4, -1,
|
|
-1, -1);
|
|
vectors[36] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, -1, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[37] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, 8, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[38] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, -1, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[39] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, 9, 3, -1, -1, -1, 4, 5,
|
|
-1, -1);
|
|
vectors[40] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, -1, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[41] = _mm_setr_epi8(0, 1, 6, -1, 2, -1, 7, 8, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[42] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, -1, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[43] = _mm_setr_epi8(0, 1, 6, 7, 2, -1, 8, 9, 3, 4, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[44] = _mm_setr_epi8(0, 1, 7, -1, 2, -1, 8, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[45] = _mm_setr_epi8(0, 1, 7, -1, 2, -1, 8, 9, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[46] = _mm_setr_epi8(0, 1, 7, 8, 2, -1, 9, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[47] = _mm_setr_epi8(0, 1, 7, 8, 2, -1, 9, 10, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[48] = _mm_setr_epi8(0, 1, 6, -1, 2, 3, 7, -1, 4, -1, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[49] = _mm_setr_epi8(0, 1, 6, -1, 2, 3, 7, 8, 4, -1, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[50] = _mm_setr_epi8(0, 1, 6, 7, 2, 3, 8, -1, 4, -1, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[51] = _mm_setr_epi8(0, 1, 6, 7, 2, 3, 8, 9, 4, -1, -1, -1, 5, -1,
|
|
-1, -1);
|
|
vectors[52] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, -1, 4, -1, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[53] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, 9, 4, -1, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[54] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, -1, 4, -1, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[55] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, 10, 4, -1, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[56] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, -1, 4, 5, -1, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[57] = _mm_setr_epi8(0, 1, 7, -1, 2, 3, 8, 9, 4, 5, -1, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[58] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, -1, 4, 5, -1, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[59] = _mm_setr_epi8(0, 1, 7, 8, 2, 3, 9, 10, 4, 5, -1, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[60] = _mm_setr_epi8(0, 1, 8, -1, 2, 3, 9, -1, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[61] = _mm_setr_epi8(0, 1, 8, -1, 2, 3, 9, 10, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[62] = _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, -1, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[63] = _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[64] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
|
|
-1, -1, -1);
|
|
vectors[65] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
|
|
4, -1, -1);
|
|
vectors[66] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3,
|
|
4, 5, -1);
|
|
vectors[67] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
|
|
-1, -1, -1);
|
|
vectors[68] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
|
|
5, -1, -1);
|
|
vectors[69] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4,
|
|
5, 6, -1);
|
|
vectors[70] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[71] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[72] = _mm_setr_epi8(0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6,
|
|
7, -1);
|
|
vectors[73] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
|
|
-1, -1, -1);
|
|
vectors[74] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
|
|
5, -1, -1);
|
|
vectors[75] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4,
|
|
5, 6, -1);
|
|
vectors[76] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[77] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[78] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6,
|
|
7, -1);
|
|
vectors[79] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[80] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[81] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7,
|
|
8, -1);
|
|
vectors[82] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[83] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[84] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6,
|
|
7, -1);
|
|
vectors[85] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[86] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[87] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7,
|
|
8, -1);
|
|
vectors[88] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[89] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[90] = _mm_setr_epi8(0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8,
|
|
9, -1);
|
|
vectors[91] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
|
|
-1, -1, -1);
|
|
vectors[92] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
|
|
5, -1, -1);
|
|
vectors[93] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4,
|
|
5, 6, -1);
|
|
vectors[94] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[95] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6,
|
|
-1, -1);
|
|
vectors[96] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6,
|
|
7, -1);
|
|
vectors[97] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1,
|
|
-1, -1);
|
|
vectors[98] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[99] = _mm_setr_epi8(0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7,
|
|
8, -1);
|
|
vectors[100] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[101] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
|
|
6, -1, -1);
|
|
vectors[102] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5,
|
|
6, 7, -1);
|
|
vectors[103] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6,
|
|
-1, -1, -1);
|
|
vectors[104] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[105] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7,
|
|
8, -1);
|
|
vectors[106] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[107] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[108] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8,
|
|
9, -1);
|
|
vectors[109] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6,
|
|
-1, -1, -1);
|
|
vectors[110] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[111] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7,
|
|
8, -1);
|
|
vectors[112] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[113] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[114] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8,
|
|
9, -1);
|
|
vectors[115] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1,
|
|
-1, -1);
|
|
vectors[116] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9,
|
|
-1, -1);
|
|
vectors[117] = _mm_setr_epi8(0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9,
|
|
10, -1);
|
|
vectors[118] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
|
|
-1, -1, -1);
|
|
vectors[119] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
|
|
6, -1, -1);
|
|
vectors[120] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5,
|
|
6, 7, -1);
|
|
vectors[121] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6,
|
|
-1, -1, -1);
|
|
vectors[122] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[123] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7,
|
|
8, -1);
|
|
vectors[124] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[125] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[126] = _mm_setr_epi8(0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8,
|
|
9, -1);
|
|
vectors[127] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6,
|
|
-1, -1, -1);
|
|
vectors[128] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7,
|
|
-1, -1);
|
|
vectors[129] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7,
|
|
8, -1);
|
|
vectors[130] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[131] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[132] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8,
|
|
9, -1);
|
|
vectors[133] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1,
|
|
-1, -1);
|
|
vectors[134] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9,
|
|
-1, -1);
|
|
vectors[135] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9,
|
|
10, -1);
|
|
vectors[136] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1,
|
|
-1, -1);
|
|
vectors[137] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8,
|
|
-1, -1);
|
|
vectors[138] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8,
|
|
9, -1);
|
|
vectors[139] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1,
|
|
-1, -1);
|
|
vectors[140] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9,
|
|
-1, -1);
|
|
vectors[141] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9,
|
|
10, -1);
|
|
vectors[142] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1,
|
|
-1, -1);
|
|
vectors[143] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10,
|
|
-1, -1);
|
|
vectors[144] = _mm_setr_epi8(0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10,
|
|
11, -1);
|
|
vectors[145] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1,
|
|
-1, -1, -1, 1);
|
|
vectors[146] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, -1, -1,
|
|
-1, -1, -1, 1);
|
|
vectors[147] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1,
|
|
-1, -1, -1, 1);
|
|
vectors[148] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4,
|
|
-1, -1, 1);
|
|
vectors[149] = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4,
|
|
-1, 5, 1);
|
|
vectors[150] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1,
|
|
-1, -1, -1, 2);
|
|
vectors[151] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, -1, -1,
|
|
-1, -1, -1, 2);
|
|
vectors[152] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, -1,
|
|
-1, -1, 2);
|
|
vectors[153] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5,
|
|
-1, -1, 2);
|
|
vectors[154] = _mm_setr_epi8(1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5,
|
|
-1, 6, 2);
|
|
vectors[155] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, -1, -1, -1, -1,
|
|
-1, -1, -1, 3);
|
|
vectors[156] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, -1, -1, -1,
|
|
-1, -1, 3);
|
|
vectors[157] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, -1,
|
|
-1, -1, 3);
|
|
vectors[158] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6,
|
|
-1, -1, 3);
|
|
vectors[159] = _mm_setr_epi8(1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6,
|
|
-1, 7, 3);
|
|
vectors[160] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, -1, -1, -1, -1, -1,
|
|
-1, -1, 4);
|
|
vectors[161] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, -1, -1, -1,
|
|
-1, -1, 4);
|
|
vectors[162] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, -1,
|
|
-1, -1, 4);
|
|
vectors[163] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7,
|
|
-1, -1, 4);
|
|
vectors[164] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7,
|
|
-1, 8, 4);
|
|
vectors[165] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, -1, -1, -1, -1, -1,
|
|
-1, -1, 5);
|
|
vectors[166] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, -1, -1, -1,
|
|
-1, -1, 5);
|
|
vectors[167] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, -1,
|
|
-1, -1, 5);
|
|
vectors[168] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1,
|
|
-1, 5);
|
|
vectors[169] = _mm_setr_epi8(1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1,
|
|
9, 5);
|
|
uint64_t i;
|
|
for (i = 0; i < sizeof(bytes_consumed); i++) {
|
|
index_bytes_consumed combined = { vec_lookup[i], bytes_consumed[i] };
|
|
combined_lookup[i] = combined;
|
|
}
|
|
|
|
}
|
|
|
|
static uint64_t masked_vbyte_read_group(const uint8_t* in, uint32_t* out,
|
|
uint64_t mask, uint64_t* ints_read) {
|
|
__m128i initial = _mm_lddqu_si128((const __m128i *) (in));
|
|
__m128i * mout = (__m128i *) out;
|
|
|
|
if (!(mask & 0xFFFF)) {
|
|
__m128i result = _mm_cvtepi8_epi32(initial);
|
|
_mm_storeu_si128(mout, result);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
_mm_storeu_si128(mout + 1, result);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
_mm_storeu_si128(mout + 2, result);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
_mm_storeu_si128(mout + 3, result);
|
|
*ints_read = 16;
|
|
return 16;
|
|
}
|
|
|
|
uint32_t low_12_bits = mask & 0xFFF;
|
|
// combine index and bytes consumed into a single lookup
|
|
index_bytes_consumed combined = combined_lookup[low_12_bits];
|
|
uint64_t consumed = combined.bytes_consumed;
|
|
uint8_t index = combined.index;
|
|
|
|
__m128i shuffle_vector = vectors[index];
|
|
|
|
if (index < 64) {
|
|
*ints_read = 6;
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x007F));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x7F00));
|
|
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
|
|
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
|
|
__m128i unpacked_result_a = _mm_and_si128(packed_result,
|
|
_mm_set1_epi32(0x0000FFFF));
|
|
_mm_storeu_si128(mout, unpacked_result_a);
|
|
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
|
|
_mm_storel_epi64(mout+1, unpacked_result_b);
|
|
//_mm_storeu_si128(mout + 1, unpacked_result_b); // maybe faster to write 16 bytes?
|
|
return consumed;
|
|
}
|
|
if (index < 145) {
|
|
|
|
*ints_read = 4;
|
|
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x0000007F));
|
|
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x00007F00));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x007F0000));
|
|
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
|
|
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
|
|
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
|
|
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
|
|
_mm_storeu_si128(mout, result);
|
|
return consumed;
|
|
}
|
|
|
|
*ints_read = 2;
|
|
|
|
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
|
|
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
|
|
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
|
|
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
|
|
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
|
|
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
|
|
__m128i result_evens = _mm_or_si128(recombined, low_byte);
|
|
__m128i result = _mm_shuffle_epi8(result_evens,
|
|
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
|
|
-1));
|
|
_mm_storel_epi64(mout, result);
|
|
//_mm_storeu_si128(mout, result); // maybe faster to write 16 bytes?
|
|
|
|
return consumed;
|
|
}
|
|
|
|
__m128i PrefixSum(__m128i curr, __m128i prev) {
|
|
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B C] (already done)
|
|
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
|
|
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BC CD]
|
|
Add = _mm_slli_si128(curr, 8); // Cycle 3: [- - A AB]
|
|
curr = _mm_add_epi32(curr, prev); // Cycle 3: [PA PAB PBC PCD]
|
|
curr = _mm_add_epi32(curr, Add); // Cycle 4: [PA PAB PABC PABCD]
|
|
return curr;
|
|
}
|
|
|
|
// only the first two ints of curr are meaningful, rest is garbage to beignored
|
|
__m128i PrefixSum2ints(__m128i curr, __m128i prev) {
|
|
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B G] (already done)
|
|
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
|
|
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BG GG]
|
|
curr = _mm_shuffle_epi32(curr, 0x54); //Cycle 3:[A AB AB AB]
|
|
curr = _mm_add_epi32(curr, prev); // Cycle 4: [PA PAB PAB PAB]
|
|
return curr;
|
|
}
|
|
|
|
static uint64_t masked_vbyte_read_group_delta(const uint8_t* in, uint32_t* out,
|
|
uint64_t mask, uint64_t* ints_read, __m128i * prev) {
|
|
__m128i initial = _mm_lddqu_si128((const __m128i *) (in));
|
|
__m128i * mout = (__m128i *) out;
|
|
|
|
if (!(mask & 0xFFFF)) {
|
|
__m128i result = _mm_cvtepi8_epi32(initial);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 1, *prev);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 2, *prev);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 3, *prev);
|
|
*ints_read = 16;
|
|
return 16;
|
|
}
|
|
|
|
uint32_t low_12_bits = mask & 0xFFF;
|
|
// combine index and bytes consumed into a single lookup
|
|
index_bytes_consumed combined = combined_lookup[low_12_bits];
|
|
uint64_t consumed = combined.bytes_consumed;
|
|
uint8_t index = combined.index;
|
|
|
|
__m128i shuffle_vector = vectors[index];
|
|
|
|
if (index < 64) {
|
|
*ints_read = 6;
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x007F));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x7F00));
|
|
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
|
|
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
|
|
__m128i unpacked_result_a = _mm_and_si128(packed_result,
|
|
_mm_set1_epi32(0x0000FFFF));
|
|
*prev = PrefixSum(unpacked_result_a, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
|
|
*prev = PrefixSum2ints(unpacked_result_b, *prev);
|
|
_mm_storel_epi64(mout + 1, *prev);
|
|
return consumed;
|
|
}
|
|
if (index < 145) {
|
|
|
|
*ints_read = 4;
|
|
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x0000007F));
|
|
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x00007F00));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x007F0000));
|
|
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
|
|
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
|
|
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
|
|
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
return consumed;
|
|
}
|
|
|
|
*ints_read = 2;
|
|
|
|
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
|
|
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
|
|
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
|
|
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
|
|
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
|
|
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
|
|
__m128i result_evens = _mm_or_si128(recombined, low_byte);
|
|
__m128i result = _mm_shuffle_epi8(result_evens,
|
|
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
|
|
-1));
|
|
*prev = PrefixSum2ints(result, *prev);
|
|
_mm_storel_epi64(mout, *prev);
|
|
return consumed;
|
|
}
|
|
|
|
|
|
static int read_int_group(const uint8_t* in, uint32_t* out, int* ints_read) {
|
|
|
|
__m128i initial = _mm_lddqu_si128((const __m128i *) in);
|
|
__m128i * const mout = (__m128i *) out;
|
|
|
|
int mask = _mm_movemask_epi8(initial);
|
|
if (mask == 0) {
|
|
__m128i result;
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
_mm_storeu_si128(mout, result);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
_mm_storeu_si128(mout + 1, result);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
_mm_storeu_si128(mout + 2, result);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
_mm_storeu_si128(mout + 3, result);
|
|
*ints_read = 16;
|
|
return 16;
|
|
}
|
|
int mask2 = mask & 0xFFF;
|
|
index_bytes_consumed combined = combined_lookup[mask2];
|
|
|
|
int index = combined.index;
|
|
|
|
__m128i shuffle_vector = vectors[index];
|
|
int consumed = combined.bytes_consumed;
|
|
|
|
if (index < 64) {
|
|
*ints_read = 6;
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x007F));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x7F00));
|
|
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
|
|
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
|
|
__m128i unpacked_result_a = _mm_and_si128(packed_result,
|
|
_mm_set1_epi32(0x0000FFFF));
|
|
_mm_storeu_si128(mout, unpacked_result_a);
|
|
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
|
|
_mm_storel_epi64(mout + 1, unpacked_result_b);
|
|
return consumed;
|
|
}
|
|
if (index < 145) {
|
|
|
|
*ints_read = 4;
|
|
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x0000007F));
|
|
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x00007F00));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x007F0000));
|
|
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
|
|
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
|
|
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
|
|
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
|
|
_mm_storeu_si128(mout, result);
|
|
return consumed;
|
|
}
|
|
|
|
*ints_read = 2;
|
|
|
|
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
|
|
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
|
|
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
|
|
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
|
|
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
|
|
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
|
|
__m128i result_evens = _mm_or_si128(recombined, low_byte);
|
|
__m128i result = _mm_shuffle_epi8(result_evens,
|
|
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
|
|
-1));
|
|
|
|
_mm_storel_epi64(mout, result);
|
|
return consumed;
|
|
}
|
|
|
|
|
|
// len_signed : number of ints we want to decode
|
|
size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out,
|
|
uint64_t length) {
|
|
size_t consumed = 0; // number of bytes read
|
|
uint64_t count = 0; // how many integers we have read so far
|
|
|
|
uint64_t sig = 0;
|
|
int availablebytes = 0;
|
|
if (96 < length) {
|
|
size_t scanned = 0;
|
|
|
|
|
|
#ifdef __AVX2__
|
|
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
uint32_t lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
uint32_t lowSig1 = _mm_movemask_epi8(low1);
|
|
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
uint32_t lowSig2 = _mm_movemask_epi8(low2);
|
|
uint32_t lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
// excess verbosity to avoid problems with sign extension on conversions
|
|
// better to think about what's happening and make it clearer
|
|
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
uint32_t highSig = _mm_movemask_epi8(high);
|
|
uint64_t nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
scanned += 48;
|
|
|
|
while (count + 96 < length) { // 96 == 48 + 48 ahead for scanning
|
|
uint64_t thisSig = nextSig;
|
|
|
|
#ifdef __AVX2__
|
|
low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
lowSig1 = _mm_movemask_epi8(low1);
|
|
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
lowSig2 = _mm_movemask_epi8(low2);
|
|
lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
highSig = _mm_movemask_epi8(high);
|
|
nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
|
|
uint64_t remaining = scanned - (consumed + 48);
|
|
sig = (thisSig << remaining) | sig;
|
|
|
|
uint64_t reload = scanned - 16;
|
|
scanned += 48;
|
|
|
|
// need to reload when less than 16 scanned bytes remain in sig
|
|
while (consumed < reload) {
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group(in + consumed,
|
|
out + count, sig, &ints_read);
|
|
sig >>= bytes;
|
|
|
|
// seems like this might force the compiler to prioritize shifting sig >>= bytes
|
|
if (sig == 0xFFFFFFFFFFFFFFFF)
|
|
return 0; // fake check to force earliest evaluation
|
|
|
|
consumed += bytes;
|
|
count += ints_read;
|
|
}
|
|
}
|
|
sig = (nextSig << (scanned - consumed - 48)) | sig;
|
|
availablebytes = scanned - consumed;
|
|
}
|
|
while (availablebytes + count < length) {
|
|
if (availablebytes < 16) {
|
|
if (availablebytes + count + 31 < length) {
|
|
#ifdef __AVX2__
|
|
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
|
|
sig |= (newsigavx << availablebytes);
|
|
#else
|
|
uint64_t newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
uint64_t newsig2 = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes + 16
|
|
+ consumed)));
|
|
sig |= (newsig << availablebytes)
|
|
| (newsig2 << (availablebytes + 16));
|
|
#endif
|
|
availablebytes += 32;
|
|
} else if (availablebytes + count + 15 < length) {
|
|
int newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
sig |= newsig << availablebytes;
|
|
availablebytes += 16;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
uint64_t ints_read;
|
|
|
|
uint64_t eaten = masked_vbyte_read_group(in + consumed, out + count,
|
|
sig, &ints_read);
|
|
consumed += eaten;
|
|
availablebytes -= eaten;
|
|
sig >>= eaten;
|
|
count += ints_read;
|
|
}
|
|
for (; count < length; count++) {
|
|
consumed += read_int(in + consumed, out + count);
|
|
}
|
|
return consumed;
|
|
}
|
|
|
|
|
|
// inputsize : number of input bytes we want to decode
|
|
// returns the number of written ints
|
|
size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out,
|
|
size_t inputsize) {
|
|
size_t consumed = 0; // number of bytes read
|
|
uint32_t * initout = out;
|
|
|
|
uint64_t sig = 0;
|
|
int availablebytes = 0;
|
|
if (96 < inputsize) {
|
|
size_t scanned = 0;
|
|
|
|
|
|
#ifdef __AVX2__
|
|
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
uint32_t lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
uint32_t lowSig1 = _mm_movemask_epi8(low1);
|
|
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
uint32_t lowSig2 = _mm_movemask_epi8(low2);
|
|
uint32_t lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
// excess verbosity to avoid problems with sign extension on conversions
|
|
// better to think about what's happening and make it clearer
|
|
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
uint32_t highSig = _mm_movemask_epi8(high);
|
|
uint64_t nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
scanned += 48;
|
|
|
|
while (scanned + 48 <= inputsize) { // 96 == 48 + 48 ahead for scanning
|
|
uint64_t thisSig = nextSig;
|
|
|
|
#ifdef __AVX2__
|
|
low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
lowSig1 = _mm_movemask_epi8(low1);
|
|
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
lowSig2 = _mm_movemask_epi8(low2);
|
|
lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
highSig = _mm_movemask_epi8(high);
|
|
nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
|
|
uint64_t remaining = scanned - (consumed + 48);
|
|
sig = (thisSig << remaining) | sig;
|
|
|
|
uint64_t reload = scanned - 16;
|
|
scanned += 48;
|
|
|
|
// need to reload when less than 16 scanned bytes remain in sig
|
|
while (consumed < reload) {
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group(in + consumed,
|
|
out, sig, &ints_read);
|
|
sig >>= bytes;
|
|
|
|
// seems like this might force the compiler to prioritize shifting sig >>= bytes
|
|
if (sig == 0xFFFFFFFFFFFFFFFF)
|
|
return 0; // fake check to force earliest evaluation
|
|
|
|
consumed += bytes;
|
|
out += ints_read;
|
|
}
|
|
}
|
|
sig = (nextSig << (scanned - consumed - 48)) | sig;
|
|
availablebytes = scanned - consumed;
|
|
}
|
|
while (1) {
|
|
if (availablebytes < 16) {
|
|
if (availablebytes + consumed + 31 < inputsize) {
|
|
#ifdef __AVX2__
|
|
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
|
|
sig |= (newsigavx << availablebytes);
|
|
#else
|
|
uint64_t newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
uint64_t newsig2 = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes + 16
|
|
+ consumed)));
|
|
sig |= (newsig << availablebytes)
|
|
| (newsig2 << (availablebytes + 16));
|
|
#endif
|
|
availablebytes += 32;
|
|
} else if(availablebytes + consumed + 15 < inputsize ) {
|
|
int newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
sig |= newsig << availablebytes;
|
|
availablebytes += 16;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group(in + consumed, out,
|
|
sig, &ints_read);
|
|
consumed += bytes;
|
|
availablebytes -= bytes;
|
|
sig >>= bytes;
|
|
out += ints_read;
|
|
}
|
|
while (consumed < inputsize) {
|
|
unsigned int shift = 0;
|
|
uint32_t v;
|
|
for (v = 0; consumed < inputsize; shift += 7) {
|
|
uint8_t c = in[consumed++];
|
|
if ((c & 128) == 0) {
|
|
out[0] = v + (c << shift);
|
|
++out;
|
|
break;
|
|
} else {
|
|
v += (c & 127) << shift;
|
|
}
|
|
}
|
|
}
|
|
return out - initout;
|
|
}
|
|
|
|
|
|
size_t read_ints(const uint8_t* in, uint32_t* out, int length) {
|
|
size_t consumed = 0;
|
|
int count;
|
|
for (count = 0; count + 15 < length;) {
|
|
int ints_read;
|
|
consumed += read_int_group(in + consumed, out + count, &ints_read);
|
|
count += ints_read;
|
|
}
|
|
for (; count < length; count++) {
|
|
consumed += read_int(in + consumed, out + count);
|
|
}
|
|
return consumed;
|
|
}
|
|
|
|
static int read_int_group_delta(const uint8_t* in, uint32_t* out,
|
|
int* ints_read, __m128i * prev) {
|
|
|
|
__m128i initial = _mm_lddqu_si128((const __m128i *) in);
|
|
__m128i * const mout = (__m128i *) out;
|
|
|
|
int mask = _mm_movemask_epi8(initial);
|
|
if (mask == 0) {
|
|
__m128i result;
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 1, *prev);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
initial = _mm_srli_si128(initial, 4);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 2, *prev);
|
|
result = _mm_cvtepi8_epi32(initial);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout + 3, *prev);
|
|
*ints_read = 16;
|
|
return 16;
|
|
}
|
|
int mask2 = mask & 0xFFF;
|
|
index_bytes_consumed combined = combined_lookup[mask2];
|
|
|
|
int index = combined.index;
|
|
|
|
__m128i shuffle_vector = vectors[index];
|
|
int consumed = combined.bytes_consumed;
|
|
|
|
if (index < 64) {
|
|
*ints_read = 6;
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x007F));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi16(0x7F00));
|
|
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
|
|
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
|
|
__m128i unpacked_result_a = _mm_and_si128(packed_result,
|
|
_mm_set1_epi32(0x0000FFFF));
|
|
*prev = PrefixSum(unpacked_result_a, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
|
|
*prev = PrefixSum2ints(unpacked_result_b, *prev);
|
|
_mm_storeu_si128(mout + 1, *prev);
|
|
return consumed;
|
|
}
|
|
if (index < 145) {
|
|
|
|
*ints_read = 4;
|
|
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
|
|
__m128i low_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x0000007F));
|
|
__m128i middle_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x00007F00));
|
|
__m128i high_bytes = _mm_and_si128(bytes_to_decode,
|
|
_mm_set1_epi32(0x007F0000));
|
|
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
|
|
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
|
|
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
|
|
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
|
|
*prev = PrefixSum(result, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
|
|
return consumed;
|
|
}
|
|
|
|
*ints_read = 2;
|
|
|
|
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
|
|
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
|
|
__m128i split_bytes = _mm_mullo_epi16(bytes_to_decode,
|
|
_mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
|
|
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
|
|
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
|
|
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
|
|
__m128i result_evens = _mm_or_si128(recombined, low_byte);
|
|
__m128i result = _mm_shuffle_epi8(result_evens,
|
|
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1,
|
|
-1));
|
|
*prev = PrefixSum2ints(result, *prev);
|
|
_mm_storeu_si128(mout, *prev);
|
|
return consumed;
|
|
}
|
|
|
|
|
|
// len_signed : number of ints we want to decode
|
|
size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out,
|
|
uint64_t length, uint32_t prev) {
|
|
//uint64_t length = (uint64_t) len_signed; // number of ints we want to decode
|
|
size_t consumed = 0; // number of bytes read
|
|
__m128i mprev = _mm_set1_epi32(prev);
|
|
uint64_t count = 0; // how many integers we have read so far
|
|
|
|
uint64_t sig = 0;
|
|
int availablebytes = 0;
|
|
if (96 < length) {
|
|
size_t scanned = 0;
|
|
|
|
|
|
#ifdef __AVX2__
|
|
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
uint32_t lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
uint32_t lowSig1 = _mm_movemask_epi8(low1);
|
|
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
uint32_t lowSig2 = _mm_movemask_epi8(low2);
|
|
uint32_t lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
// excess verbosity to avoid problems with sign extension on conversions
|
|
// better to think about what's happening and make it clearer
|
|
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
uint32_t highSig = _mm_movemask_epi8(high);
|
|
uint64_t nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
scanned += 48;
|
|
|
|
while (count + 96 < length) { // 96 == 48 + 48 ahead for scanning
|
|
uint64_t thisSig = nextSig;
|
|
|
|
#ifdef __AVX2__
|
|
low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
lowSig1 = _mm_movemask_epi8(low1);
|
|
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
lowSig2 = _mm_movemask_epi8(low2);
|
|
lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
highSig = _mm_movemask_epi8(high);
|
|
nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
|
|
uint64_t remaining = scanned - (consumed + 48);
|
|
sig = (thisSig << remaining) | sig;
|
|
|
|
uint64_t reload = scanned - 16;
|
|
scanned += 48;
|
|
|
|
// need to reload when less than 16 scanned bytes remain in sig
|
|
while (consumed < reload) {
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed,
|
|
out + count, sig, &ints_read, &mprev);
|
|
sig >>= bytes;
|
|
|
|
// seems like this might force the compiler to prioritize shifting sig >>= bytes
|
|
if (sig == 0xFFFFFFFFFFFFFFFF)
|
|
return 0; // fake check to force earliest evaluation
|
|
|
|
consumed += bytes;
|
|
count += ints_read;
|
|
}
|
|
}
|
|
sig = (nextSig << (scanned - consumed - 48)) | sig;
|
|
availablebytes = scanned - consumed;
|
|
}
|
|
while (availablebytes + count < length) {
|
|
if (availablebytes < 16) break;
|
|
|
|
if (availablebytes < 16) {
|
|
if (availablebytes + count + 31 < length) {
|
|
#ifdef __AVX2__
|
|
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
|
|
sig |= (newsigavx << availablebytes);
|
|
#else
|
|
uint64_t newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
uint64_t newsig2 = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes + 16
|
|
+ consumed)));
|
|
sig |= (newsig << availablebytes)
|
|
| (newsig2 << (availablebytes + 16));
|
|
#endif
|
|
availablebytes += 32;
|
|
} else if (availablebytes + count + 15 < length) {
|
|
int newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
sig |= newsig << availablebytes;
|
|
availablebytes += 16;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
uint64_t ints_read;
|
|
uint64_t eaten = masked_vbyte_read_group_delta(in + consumed, out + count,
|
|
sig, &ints_read, &mprev);
|
|
consumed += eaten;
|
|
availablebytes -= eaten;
|
|
sig >>= eaten;
|
|
count += ints_read;
|
|
}
|
|
prev = _mm_extract_epi32(mprev, 3);
|
|
for (; count < length; count++) {
|
|
consumed += read_int_delta(in + consumed, out + count, &prev);
|
|
}
|
|
return consumed;
|
|
}
|
|
|
|
size_t read_ints_delta(const uint8_t* in, uint32_t* out, int length,
|
|
uint32_t prev) {
|
|
__m128i mprev = _mm_set1_epi32(prev);
|
|
size_t consumed = 0;
|
|
int count;
|
|
for (count = 0; count + 15 < length;) {
|
|
int ints_read;
|
|
consumed += read_int_group_delta(in + consumed, out + count, &ints_read,
|
|
&mprev);
|
|
count += ints_read;
|
|
}
|
|
prev = _mm_extract_epi32(mprev, 3);
|
|
for (; count < length; count++) {
|
|
consumed += read_int_delta(in + consumed, out + count, &prev);
|
|
}
|
|
return consumed;
|
|
}
|
|
|
|
|
|
|
|
// inputsize : number of input bytes we want to decode
|
|
// returns the number of written ints
|
|
size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out,
|
|
size_t inputsize, uint32_t prev) {
|
|
size_t consumed = 0; // number of bytes read
|
|
uint32_t * initout = out;
|
|
__m128i mprev = _mm_set1_epi32(prev);
|
|
uint64_t sig = 0;
|
|
int availablebytes = 0;
|
|
if (96 < inputsize) {
|
|
size_t scanned = 0;
|
|
|
|
|
|
#ifdef __AVX2__
|
|
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
uint32_t lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
__m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
uint32_t lowSig1 = _mm_movemask_epi8(low1);
|
|
__m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
uint32_t lowSig2 = _mm_movemask_epi8(low2);
|
|
uint32_t lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
// excess verbosity to avoid problems with sign extension on conversions
|
|
// better to think about what's happening and make it clearer
|
|
__m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
uint32_t highSig = _mm_movemask_epi8(high);
|
|
uint64_t nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
scanned += 48;
|
|
|
|
while (scanned + 48 <= inputsize) { // 96 == 48 + 48 ahead for scanning
|
|
uint64_t thisSig = nextSig;
|
|
|
|
#ifdef __AVX2__
|
|
low = _mm256_loadu_si256((__m256i *)(in + scanned));
|
|
lowSig = _mm256_movemask_epi8(low);
|
|
#else
|
|
low1 = _mm_loadu_si128((__m128i *) (in + scanned));
|
|
lowSig1 = _mm_movemask_epi8(low1);
|
|
low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16));
|
|
lowSig2 = _mm_movemask_epi8(low2);
|
|
lowSig = lowSig2 << 16;
|
|
lowSig |= lowSig1;
|
|
#endif
|
|
|
|
high = _mm_loadu_si128((__m128i *) (in + scanned + 32));
|
|
highSig = _mm_movemask_epi8(high);
|
|
nextSig = highSig;
|
|
nextSig <<= 32;
|
|
nextSig |= lowSig;
|
|
|
|
uint64_t remaining = scanned - (consumed + 48);
|
|
sig = (thisSig << remaining) | sig;
|
|
|
|
uint64_t reload = scanned - 16;
|
|
scanned += 48;
|
|
|
|
// need to reload when less than 16 scanned bytes remain in sig
|
|
while (consumed < reload) {
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed,
|
|
out, sig, &ints_read, &mprev);
|
|
sig >>= bytes;
|
|
|
|
// seems like this might force the compiler to prioritize shifting sig >>= bytes
|
|
if (sig == 0xFFFFFFFFFFFFFFFF)
|
|
return 0; // fake check to force earliest evaluation
|
|
|
|
consumed += bytes;
|
|
out += ints_read;
|
|
}
|
|
}
|
|
sig = (nextSig << (scanned - consumed - 48)) | sig;
|
|
availablebytes = scanned - consumed;
|
|
}
|
|
while (1) {
|
|
if (availablebytes < 16) {
|
|
if (availablebytes + consumed + 31 < inputsize) {
|
|
#ifdef __AVX2__
|
|
uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
|
|
sig |= (newsigavx << availablebytes);
|
|
#else
|
|
uint64_t newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
uint64_t newsig2 = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes + 16
|
|
+ consumed)));
|
|
sig |= (newsig << availablebytes)
|
|
| (newsig2 << (availablebytes + 16));
|
|
#endif
|
|
availablebytes += 32;
|
|
} else if(availablebytes + consumed + 15 < inputsize ) {
|
|
int newsig = _mm_movemask_epi8(
|
|
_mm_lddqu_si128(
|
|
(const __m128i *) (in + availablebytes
|
|
+ consumed)));
|
|
sig |= newsig << availablebytes;
|
|
availablebytes += 16;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
uint64_t ints_read;
|
|
uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out,
|
|
sig, &ints_read, &mprev);
|
|
consumed += bytes;
|
|
availablebytes -= bytes;
|
|
sig >>= bytes;
|
|
out += ints_read;
|
|
}
|
|
prev = _mm_extract_epi32(mprev, 3);
|
|
while (consumed < inputsize) {
|
|
unsigned int shift = 0; uint32_t v;
|
|
for (v = 0; consumed < inputsize; shift += 7) {
|
|
uint8_t c = in[consumed++];
|
|
if ((c & 128) == 0) {
|
|
uint32_t delta = v + (c << shift);
|
|
prev += delta;
|
|
*out++ = prev;
|
|
break;
|
|
} else {
|
|
v += (c & 127) << shift;
|
|
}
|
|
}
|
|
}
|
|
return out - initout;
|
|
}
|
|
|
|
|