size_t GetSizeOfHeadData(bool heapPageData) { if (heapPageData) { return SizeOfHeapPageHeaderData; } else { return SizeOfPageHeaderData; } } // maybe some itemid is not valid uint16 HeapPageCalcRealRowCnt (char *buf) { HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 cnt = 0; uint16 i; uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); for (i = 0; i < row_cnt; i++) { if (ItemIdIsNormal(GET_ITEMID_BY_IDX(buf, i))) { cnt++; } } return cnt; } void DecompressDeconvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) { errno_t ret; HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = real_row_cnt; uint32 total_size = page->pd_special - page->pd_upper; char *copy_begin = buf + page->pd_upper; char *row; uint16 i, j, k, cur, up, row_size; ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(ret, "", ""); k = 0; for (i = 0; i < max_row_len; i++) { for (j = 0; j < row_cnt; j++) { up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off; cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off; row_size = up - cur; row = aux_buf + cur; if (i < row_size) { row[i] = copy_begin[k++]; // this part is reshaped } } } if (k != total_size) { printf("ERROR!!! pg_deconvert_rows error...!!!\n"); ASSERT(0); return; } // cp aux_buf to page_buf ret = memcpy_sp(copy_begin, total_size, aux_buf + page->pd_upper, total_size); securec_check(ret, "", ""); return ; } // 1: as tuple_offset order, that means asc order. // 2: store all itemid's idx. // 3:maybe some itemid is not in order. void CompressConvertItemRealOrder(char *buf, int16 *real_order, uint16 real_row_cnt) { HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); ItemIdData *begin = (ItemIdData *)(buf + GetPageHeaderSize(page)); int16 *link_order = real_order + real_row_cnt; int16 i, head, curr, prev; int16 end = -1; // invalid index head = end; // very likely to seems that itemids stored by desc order, and ignore invalid itemid for (i = 0; i < row_cnt; i++) { if (!ItemIdIsNormal(begin + i)) { continue; } if (head == end) { // set the head idx, insert the first link_order[i] = end; head = i; continue; } if ((begin + i)->lp_off < (begin + head)->lp_off) { link_order[i] = head; // update the head idx head = i; continue; } prev = head; curr = link_order[head]; while ((curr != end) && ((begin + i)->lp_off > (begin + curr)->lp_off)) { prev = curr; curr = link_order[curr]; } link_order[prev] = i; link_order[i] = curr; } // arrange the link to array curr = head; for (i = 0; i < real_row_cnt; i++) { real_order[i] = curr; curr = link_order[curr]; } if (curr != end) { printf("ERROR!!! pre_convert_real_order error...!!!\n"); ASSERT(0); return; } } int DecompressPage(const char* src, char* dst, uint8 algorithm) { if (PageIs8BXidHeapVersion(src)) { return TemplateDecompressPage(src, dst, algorithm); } else { return TemplateDecompressPage(src, dst, algorithm); } } void cprs_diff_deconvert_rows(char *buf, uint32 offset, uint16 min_row_len, uint16 real_row_cnt) { uint16 row_cnt = real_row_cnt; uint32 common_size = min_row_len; uint8 *copy_begin = (uint8 *)(buf + offset); uint16 i, j; for (i = 0; i < common_size; i++) { for (j = 1; j < row_cnt; j++) { copy_begin[i * row_cnt + j] += copy_begin[i * row_cnt + (j - 1)]; } } return ; } // to find all row size are diffs in MIN_DIFF_SIZE byts. bool CompressConvertCheck(char *buf, int16 **real_order, uint16 *max_row_len, uint16 *min_row_len, uint16 *real_row_cnt) { HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); int16 i, row_size; ItemIdData *ptr = NULL; uint16 up = page->pd_special; uint16 min_size = GS_INVALID_ID16; uint16 max_size = 0; errno_t ret; if (page->pd_lower < GetPageHeaderSize(page) || (page->pd_lower > page->pd_upper)) { return false; } uint16 normal_row_cnt = HeapPageCalcRealRowCnt(buf); if (normal_row_cnt < MIN_CONVERT_CNT) { // no need convert return false; } // to store the real tuple order. /* --------------------------|-------------------------- xxxxxxxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxxxxxxxxxxxxx --------------------------|-------------------------- */ // the first part is real array order, and the second part is link. *real_order = (int16 *)malloc(sizeof(uint16) * row_cnt * 2); if (*real_order == NULL) { printf("zfunc compress file"); return false; } ret = memset_sp(*real_order, sizeof(uint16) * row_cnt * 2, 0, sizeof(uint16) * row_cnt * 2); securec_check(ret, "", ""); // order the ItemIds by tuple_offset order. CompressConvertItemRealOrder(buf, *real_order, normal_row_cnt); // do the check, to check all size of tuples. for (i = normal_row_cnt - 1; i >= 0; i--) { ptr = GET_ITEMID_BY_IDX(buf, ((*real_order)[i])); row_size = up - ptr->lp_off; if (row_size < MIN_CONVERT_CNT * 2) { return false; } min_size = (row_size < min_size) ? row_size : min_size; max_size = (row_size > max_size) ? row_size : max_size; if ((max_size - min_size) > MIN_DIFF_SIZE) { // no need convert return false; } up = ptr->lp_off; } // get the min row common size. *max_row_len = max_size; *min_row_len = min_size; *real_row_cnt = normal_row_cnt; return true; } void DecompressDeconvertItemIds(char *buf, char *aux_buf) { errno_t ret; HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); uint32 total_size = row_cnt * sizeof(ItemIdData); char *copy_begin = buf + GetPageHeaderSize(page); uint16 i, j, k; // clear aux_buf ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(ret, "", ""); k = 0; for (i = 0; i < sizeof(ItemIdData); i++) { for (j = 0; j < row_cnt; j++) { aux_buf[j * sizeof(ItemIdData) + i] = copy_begin[k++]; } } // cp aux_buf to page_buf ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); securec_check(ret, "", ""); return ; } void DecompressDeconvertOnePage(char *buf, char *aux_buf, bool diff_convert) { uint16 max_row_len = 0; uint16 min_row_len = 0; int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real uint16 real_row_cnt = 0; if (diff_convert) { cprs_diff_deconvert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData), (((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData)); } // =======firstly, arrange the itemids. DecompressDeconvertItemIds(buf, aux_buf); if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) { if (real_order != NULL) { free(real_order); } ASSERT(0); return ; } // =======and last, the tuples if (diff_convert) { cprs_diff_deconvert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt); } DecompressDeconvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt); if (real_order != NULL) { free(real_order); } return ; } void DecompressPageDeconvert(char *src, bool diff_convert) { char *aux_buf = NULL; errno_t rc; aux_buf = (char *)malloc(BLCKSZ); if (aux_buf == NULL) { // add log return; } rc = memset_s(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(rc, "", ""); // do convert DecompressDeconvertOnePage(src, aux_buf, diff_convert); if (aux_buf != NULL) { free(aux_buf); } } /** * DecompressPage() -- Decompress one compressed page. * return size of decompressed page which should be BLCKSZ or * -1 for decompress error * -2 for unrecognized compression algorithm * * note:The size of dst must be greater than or equal to BLCKSZ. */ template int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm) { int decompressed_size; char* data; uint32 size; bool byte_convert, diff_convert; size_t sizeOfPageHeaderData = GetSizeOfHeadData(heapPageData); int rc = memcpy_s(dst, sizeOfPageHeaderData, src, sizeOfPageHeaderData); securec_check(rc, "", ""); if (heapPageData) { data = ((HeapPageCompressData*) src)->data; size = ((HeapPageCompressData*) src)->size; byte_convert = ((HeapPageCompressData*) src)->byte_convert; diff_convert = ((HeapPageCompressData*) src)->diff_convert; } else { data = ((PageCompressData*) src)->data; size = ((PageCompressData*) src)->size; byte_convert = ((PageCompressData*) src)->byte_convert; diff_convert = ((PageCompressData*) src)->diff_convert; } switch (algorithm) { case COMPRESS_ALGORITHM_PGLZ: decompressed_size = lz_decompress( data, size, dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, false); break; case COMPRESS_ALGORITHM_ZSTD: decompressed_size = ZSTD_decompress(dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, data, size); if (ZSTD_isError(decompressed_size)) { return -1; } break; default: return COMPRESS_UNSUPPORTED_ERROR; break; } if (byte_convert) { // deconvert dst DecompressPageDeconvert(dst, diff_convert); } return sizeOfPageHeaderData + decompressed_size; } // pg_lz /* ---------- * pg_lzcompress.c - * * This is an implementation of LZ compression for PostgreSQL. * It uses a simple history table and generates 2-3 byte tags * capable of backward copy information for 3-273 bytes with * a max offset of 4095. * * Entry routines: * * bool * pglz_compress(const char *source, int32 slen, PGLZ_Header *dest, * const PGLZ_Strategy *strategy); * * source is the input data to be compressed. * * slen is the length of the input data. * * dest is the output area for the compressed result. * It must be at least as big as PGLZ_MAX_OUTPUT(slen). * * strategy is a pointer to some information controlling * the compression algorithm. If NULL, the compiled * in default strategy is used. * * The return value is TRUE if compression succeeded, * FALSE if not; in the latter case the contents of dest * are undefined. * * void * pglz_decompress(const PGLZ_Header *source, char *dest) * * source is the compressed input. * * dest is the area where the uncompressed data will be * written to. It is the callers responsibility to * provide enough space. The required amount can be * obtained with the macro PGLZ_RAW_SIZE(source). * * The data is written to buff exactly as it was handed * to pglz_compress(). No terminating zero byte is added. * * The decompression algorithm and internal data format: * * PGLZ_Header is defined as * * typedef struct PGLZ_Header { * int32 vl_len_; * int32 rawsize; * } * * The header is followed by the compressed data itself. * * The data representation is easiest explained by describing * the process of decompression. * * If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data * is stored uncompressed as plain bytes. Thus, the decompressor * simply copies rawsize bytes from the location after the * header to the destination. * * Otherwise the first byte after the header tells what to do * the next 8 times. We call this the control byte. * * An unset bit in the control byte means, that one uncompressed * byte follows, which is copied from input to output. * * A set bit in the control byte means, that a tag of 2-3 bytes * follows. A tag contains information to copy some bytes, that * are already in the output buffer, to the current location in * the output. Let's call the three tag bytes T1, T2 and T3. The * position of the data to copy is coded as an offset from the * actual output position. * * The offset is in the upper nibble of T1 and in T2. * The length is in the lower nibble of T1. * * So the 16 bits of a 2 byte tag are coded as * * 7---T1--0 7---T2--0 * OOOO LLLL OOOO OOOO * * This limits the offset to 1-4095 (12 bits) and the length * to 3-18 (4 bits) because 3 is always added to it. To emit * a tag of 2 bytes with a length of 2 only saves one control * bit. But we lose one byte in the possible length of a tag. * * In the actual implementation, the 2 byte tag's length is * limited to 3-17, because the value 0xF in the length nibble * has special meaning. It means, that the next following * byte (T3) has to be added to the length value of 18. That * makes total limits of 1-4095 for offset and 3-273 for length. * * Now that we have successfully decoded a tag. We simply copy * the output that occurred bytes back to the current * output location in the specified . Thus, a * sequence of 200 spaces (think about bpchar fields) could be * coded in 4 bytes. One literal space and a three byte tag to * copy 199 bytes with a -1 offset. Whow - that's a compression * rate of 98%! Well, the implementation needs to save the * original data size too, so we need another 4 bytes for it * and end up with a total compression rate of 96%, what's still * worth a Whow. * * The compression algorithm * * The following uses numbers used in the default strategy. * * The compressor works best for attributes of a size between * 1K and 1M. For smaller items there's not that much chance of * redundancy in the character sequence (except for large areas * of identical bytes like trailing spaces) and for bigger ones * our 4K maximum look-back distance is too small. * * The compressor creates a table for 8192 lists of positions. * For each input position (except the last 3), a hash key is * built from the 4 next input bytes and the position remembered * in the appropriate list. Thus, the table points to linked * lists of likely to be at least in the first 4 characters * matching strings. This is done on the fly while the input * is compressed into the output area. Table entries are only * kept for the last 4096 input positions, since we cannot use * back-pointers larger than that anyway. * * For each byte in the input, it's hash key (built from this * byte and the next 3) is used to find the appropriate list * in the table. The lists remember the positions of all bytes * that had the same hash key in the past in increasing backward * offset order. Now for all entries in the used lists, the * match length is computed by comparing the characters from the * entries position with the characters from the actual input * position. * * The compressor starts with a so called "good_match" of 128. * It is a "prefer speed against compression ratio" optimizer. * So if the first entry looked at already has 128 or more * matching characters, the lookup stops and that position is * used for the next tag in the output. * * For each subsequent entry in the history list, the "good_match" * is lowered by 10%. So the compressor will be more happy with * short matches the farer it has to go back in the history. * Another "speed against ratio" preference characteristic of * the algorithm. * * Thus there are 3 stop conditions for the lookup of matches: * * - a match >= good_match is found * - there are no more history entries to look at * - the next history entry is already too far back * to be coded into a tag. * * Finally the match algorithm checks that at least a match * of 3 or more bytes has been found, because thats the smallest * amount of copy information to code into a tag. If so, a tag * is omitted and all the input bytes covered by that are just * scanned for the history add's, otherwise a literal character * is omitted and only his history entry added. * * Acknowledgements: * * Many thanks to Adisak Pochanayon, who's article about SLZ * inspired me to write the PostgreSQL compression this way. * * Jan Wieck * * Copyright (c) 1999-2012, PostgreSQL Global Development Group * * src/backend/utils/adt/pg_lzcompress.c * ---------- */ #include "postgres.h" #include "knl/knl_variable.h" #include #include "utils/pg_lzcompress.h" /* ---------- * The provided standard strategies * ---------- */ static const PGLZ_Strategy strategy_default_data = { 32, /* Data chunks less than 32 bytes are not * compressed */ INT_MAX, /* No upper limit on what we'll try to * compress */ 25, /* Require 25% compression rate, or not worth * it */ 1024, /* Give up if no compression in the first 1KB */ 128, /* Stop history lookup if a match of 128 bytes * is found */ 10 /* Lower good match size by 10% at every loop * iteration */ }; const PGLZ_Strategy* const PGLZ_strategy_default = &strategy_default_data; static const PGLZ_Strategy strategy_always_data = { 0, /* Chunks of any size are compressed */ INT_MAX, 0, /* It's enough to save one single byte */ INT_MAX, /* Never give up early */ 128, /* Stop history lookup if a match of 128 bytes * is found */ 6 /* Look harder for a good match */ }; const PGLZ_Strategy* const PGLZ_strategy_always = &strategy_always_data; /* ---------- * pglz_hist_idx - * * Computes the history table slot for the lookup by the next 4 * characters in the input. * * NB: because we use the next 4 characters, we are not guaranteed to * find 3-character matches; they very possibly will be in the wrong * hash list. This seems an acceptable tradeoff for spreading out the * hash keys more. * ---------- */ #define pglz_hist_idx(_s, _e) \ (((((_e) - (_s)) < 4) ? (int)(_s)[0] \ : (((unsigned char)((_s)[0]) << 9) ^ ((unsigned char)((_s)[1]) << 6) ^ \ ((unsigned char)((_s)[2]) << 3) ^ (unsigned char)((_s)[3]))) & \ (PGLZ_HISTORY_MASK)) /* ---------- * pglz_hist_add - * * Adds a new entry to the history table. * * If _recycle is true, then we are recycling a previously used entry, * and must first delink it from its old hashcode's linked list. * * NOTE: beware of multiple evaluations of macro's arguments, and note that * _hn and _recycle are modified in the macro. * ---------- */ #define pglz_hist_add(_hs, _he, _hn, _recycle, _s, _e) \ do { \ int __hindex = pglz_hist_idx((_s), (_e)); \ PGLZ_HistEntry** __myhsp = &(_hs)[__hindex]; \ PGLZ_HistEntry* __myhe = &(_he)[_hn]; \ if (_recycle) { \ if (__myhe->prev == NULL) \ (_hs)[__myhe->hindex] = __myhe->next; \ else \ __myhe->prev->next = __myhe->next; \ if (__myhe->next != NULL) \ __myhe->next->prev = __myhe->prev; \ } \ __myhe->next = *__myhsp; \ __myhe->prev = NULL; \ __myhe->hindex = __hindex; \ __myhe->pos = (_s); \ if (*__myhsp != NULL) \ (*__myhsp)->prev = __myhe; \ *__myhsp = __myhe; \ if (++(_hn) >= PGLZ_HISTORY_SIZE) { \ (_hn) = 0; \ (_recycle) = true; \ } \ } while (0) /* ---------- * pglz_out_ctrl - * * Outputs the last and allocates a new control byte if needed. * ---------- */ #define pglz_out_ctrl(__ctrlp, __ctrlb, __ctrl, __buf) \ do { \ if ((((unsigned char)(__ctrl)) & 0xff) == 0) { \ *(__ctrlp) = __ctrlb; \ __ctrlp = (__buf)++; \ __ctrlb = 0; \ __ctrl = 1; \ } \ } while (0) /* ---------- * pglz_out_literal - * * Outputs a literal byte to the destination buffer including the * appropriate control bit. * ---------- */ #define pglz_out_literal(_ctrlp, _ctrlb, _ctrl, _buf, _byte) \ do { \ pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \ *(_buf)++ = (unsigned char)(_byte); \ (_ctrl) <<= 1; \ } while (0) /* ---------- * pglz_out_tag - * * Outputs a backward reference tag of 2-4 bytes (depending on * offset and length) to the destination buffer including the * appropriate control bit. * ---------- */ #define pglz_out_tag(_ctrlp, _ctrlb, _ctrl, _buf, _len, _off) \ do { \ pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \ (_ctrlb) |= (_ctrl); \ (_ctrl) <<= 1; \ if ((_len) > 17) { \ (_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | 0x0f); \ (_buf)[1] = (unsigned char)(((uint32)(_off)&0xff)); \ (_buf)[2] = (unsigned char)((_len)-18); \ (_buf) += 3; \ } else { \ (_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | ((uint32)(_len)-3)); \ (_buf)[1] = (unsigned char)((uint32)(_off)&0xff); \ (_buf) += 2; \ } \ } while (0) #define HIST_START_LEN (sizeof(PGLZ_HistEntry*) * PGLZ_HISTORY_LISTS) #define HIST_ENTRIES_LEN (sizeof(PGLZ_HistEntry) * PGLZ_HISTORY_SIZE) #define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ static PGLZ_HistEntry* hist_start[PGLZ_MAX_HISTORY_LISTS]; static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; /* ---------- * pglz_find_match - * * Lookup the history table if the actual input stream matches * another sequence of characters, starting somewhere earlier * in the input buffer. * ---------- */ static inline int pglz_find_match( PGLZ_HistEntry** hstart, const char* input, const char* end, int* lenp, int* offp, int good_match, int good_drop) { PGLZ_HistEntry* hent = NULL; int32 len = 0; int32 off = 0; /* * Traverse the linked history list until a good enough match is found. */ hent = hstart[pglz_hist_idx(input, end)]; while (hent != NULL) { const char* ip = input; const char* hp = hent->pos; int32 thisoff; int32 thislen; /* * Stop if the offset does not fit into our tag anymore. */ thisoff = ip - hp; if (thisoff >= 0x0fff) break; /* * Determine length of match. A better match must be larger than the * best so far. And if we already have a match of 16 or more bytes, * it's worth the call overhead to use memcmp() to check if this match * is equal for the same size. After that we must fallback to * character by character comparison to know the exact position where * the diff occurred. */ thislen = 0; if (len >= 16) { if (memcmp(ip, hp, len) == 0) { thislen = len; ip += len; hp += len; while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { thislen++; ip++; hp++; } } } else { while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) { thislen++; ip++; hp++; } } /* * Remember this match as the best (if it is) */ if (thislen > len) { len = thislen; off = thisoff; } /* * Advance to the next history entry */ hent = hent->next; /* * Be happy with lesser good matches the more entries we visited. But * no point in doing calculation if we're at end of list. */ if (hent != NULL) { if (len >= good_match) break; good_match -= (good_match * good_drop) / 100; } } /* * Return match information only if it results at least in one byte * reduction. */ if (len > 2) { *lenp = len; *offp = off; return 1; } return 0; } /* ---------- * lz_compress - * * Compresses source into dest using strategy. Returns the number of * bytes written in buffer dest, or -1 if compression fails. * ---------- */ int32 lz_compress(const char* source, int32 slen, char* dest) { unsigned char* bp = (unsigned char*) dest; unsigned char* bstart = bp; int hist_next = 0; bool hist_recycle = false; const char* dp = source; const char* dend = source + slen; unsigned char ctrl_dummy = 0; unsigned char* ctrlp = &ctrl_dummy; unsigned char ctrlb = 0; unsigned char ctrl = 0; bool found_match = false; int32 match_len; int32 match_off; int32 good_match; int32 good_drop; int32 result_size; int32 result_max; int32 need_rate; errno_t rc; const PGLZ_Strategy* strategy = PGLZ_strategy_always; /* * Our fallback strategy is the default. */ if (strategy == NULL) { strategy = PGLZ_strategy_default; } /* * If the strategy forbids compression (at all or if source chunk size out * of range), fail. */ if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) { return -1; } /* * Limit the match parameters to the supported range. */ good_match = strategy->match_size_good; if (good_match > PGLZ_MAX_MATCH) { good_match = PGLZ_MAX_MATCH; } else if (good_match < 17) { good_match = 17; } good_drop = strategy->match_size_drop; if (good_drop < 0) { good_drop = 0; } else if (good_drop > 100) { good_drop = 100; } need_rate = strategy->min_comp_rate; if (need_rate < 0) { need_rate = 0; } else if (need_rate > 99) { need_rate = 99; } /* * Compute the maximum result size allowed by the strategy, namely the * input size minus the minimum wanted compression rate. This had better * be <= slen, else we might overrun the provided output buffer. */ if (slen > (INT_MAX / 100)) { /* Approximate to avoid overflow */ result_max = (slen / 100) * (100 - need_rate); } else { result_max = (slen * (100 - need_rate)) / 100; } /* * Initialize the history lists to empty. We do not need to zero the * hist_entries[] array; its entries are initialized as they are used. */ rc = memset_s(hist_start, HIST_START_LEN, 0, HIST_START_LEN); securec_check(rc, "\0", "\0"); /* * Compress the source directly into the output buffer. */ while (dp < dend) { /* * If we already exceeded the maximum result size, fail. * * We check once per loop; since the loop body could emit as many as 4 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better * allow 4 slop bytes. */ if (bp - bstart >= result_max) { return -1; } /* * If we've emitted more than first_success_by bytes without finding * anything compressible at all, fail. This lets us fall out * reasonably quickly when looking at incompressible input (such as * pre-compressed data). */ if (!found_match && bp - bstart >= strategy->first_success_by) { return -1; } /* * Try to find a match in the history */ if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) { /* * Create the tag and add history entries for all matched * characters. */ pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); while (match_len--) { pglz_hist_add( hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } found_match = true; } else { /* * No match found. Copy one literal byte. */ pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); pglz_hist_add( hist_start, hist_entries, hist_next, hist_recycle, dp, dend); dp++; /* Do not do this ++ in the line above! */ /* The macro would do it four times - Jan. */ } } /* * Write out the last control byte and check that we haven't overrun the * output size allowed by the strategy. */ *ctrlp = ctrlb; result_size = bp - bstart; if (result_size >= result_max) { return -1; } /* success */ return result_size; } /* ---------- * pglz_decompress - * * Decompresses source into dest. Returns the number of bytes * decompressed in the destination buffer, and *optionally* * checks that both the source and dest buffers have been * fully read and written to, respectively. * ---------- */ int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete) { const unsigned char* sp; const unsigned char* srcend; unsigned char* dp; unsigned char* destend; errno_t rc = 0; sp = (const unsigned char*) source; srcend = ((const unsigned char*) source) + slen; dp = (unsigned char*) dest; destend = dp + rawsize; while (sp < srcend && dp < destend) { /* * Read one control byte and process the next 8 items (or as many as * remain in the compressed input). */ unsigned char ctrl = *sp++; int ctrlc; for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) { if (ctrl & 1) { /* * Set control bit means we must read a match tag. The match * is coded with two bytes. First byte uses lower nibble to * code length - 3. Higher nibble contains upper 4 bits of the * offset. The next following byte contains the lower 8 bits * of the offset. If the length is coded as 18, another * extension tag byte tells how much longer the match really * was (0-255). */ int32 len; int32 off; len = (sp[0] & 0x0f) + 3; off = ((sp[0] & 0xf0) << 4) | sp[1]; sp += 2; if (len == 18) { len += *sp++; } /* * Now we copy the bytes specified by the tag from OUTPUT to * OUTPUT (copy len bytes from dp - off to dp). The copied * areas could overlap, to preven possible uncertainty, we * copy only non-overlapping regions. */ len = Min(len, destend - dp); while (off < len) { /*--------- * When offset is smaller than length - source and * destination regions overlap. memmove() is resolving * this overlap in an incompatible way with pglz. Thus we * resort to memcpy()-ing non-overlapping regions. * * Consider input: 112341234123412341234 * At byte 5 here ^ we have match with length 16 and * offset 4. 11234M(len=16, off=4) * We are decoding first period of match and rewrite match * 112341234M(len=12, off=8) * * The same match is now at position 9, it points to the * same start byte of output, but from another position: * the offset is doubled. * * We iterate through this offset growth until we can * proceed to usual memcpy(). If we would try to decode * the match at byte 5 (len=16, off=4) by memmove() we * would issue memmove(5, 1, 16) which would produce * 112341234XXXXXXXXXXXX, where series of X is 12 * undefined bytes, that were at bytes [5:17]. * --------- */ errno_t rc = memcpy_s(dp, off + 1, dp - off, off); securec_check(rc, "", ""); len -= off; dp += off; off += off; } rc = memcpy_s(dp, len + 1, dp - off, len); securec_check(rc, "", ""); dp += len; } else { /* * An unset control bit means LITERAL BYTE. So we just copy * one from INPUT to OUTPUT. */ *dp++ = *sp++; } /* * Advance the control bit */ ctrl >>= 1; } } /* * Check we decompressed the right amount. If we are slicing, then we * won't necessarily be at the end of the source or dest buffers when we * hit a stop, so we don't test them. */ if (check_complete && (dp != destend || sp != srcend)) { return -1; } /* * That's it. */ return (char*) dp - dest; } int CompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option) { if (PageIs8BXidHeapVersion(src)) { return TemplateCompressPage(src, dst, dst_size, option); } else { return TemplateCompressPage(src, dst, dst_size, option); } } void CompressConvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) { errno_t ret; HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = real_row_cnt; uint32 total_size = page->pd_special - page->pd_upper; char *copy_begin = buf + page->pd_upper; char *row; uint16 i, j, k, cur, up, row_size; ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(ret, "", ""); k = 0; for (i = 0; i < max_row_len; i++) { for (j = 0; j < row_cnt; j++) { up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off; cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off; row_size = up - cur; row = buf + cur; if (i < row_size) { aux_buf[k++] = row[i]; // this part is reshaped } } } if (k != total_size) { printf("ERROR!!! convert_rows_2 error...!!!\n"); ASSERT(0); return; } // cp aux_buf to page_buf ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); securec_check(ret, "", ""); return ; } void CompressConvertItemIds(char *buf, char *aux_buf) { errno_t ret; HeapPageHeaderData *page = (HeapPageHeaderData *)buf; uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData); uint32 total_size = row_cnt * sizeof(ItemIdData); char *copy_begin = buf + GetPageHeaderSize(page); uint16 i, j, k; // clear aux_buf ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(ret, "", ""); k = 0; for (i = 0; i < row_cnt; i++) { for (j = 0; j < sizeof(ItemIdData); j++) { aux_buf[j * row_cnt + i] = copy_begin[k++]; } } // cp aux_buf to page_buf ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size); securec_check(ret, "", ""); return ; } void cprs_diff_convert_rows(char *buf, uint32 offset,uint16 min_row_len, uint16 real_row_cnt) { uint16 row_cnt = real_row_cnt; uint32 common_size = min_row_len; uint8 *copy_begin = (uint8 *)(buf + offset); uint16 i, j; for (i = 0; i < common_size; i++) { for (j = row_cnt - 1; j > 0; j--) { copy_begin[i * row_cnt + j] -= copy_begin[i * row_cnt + (j - 1)]; } } return ; } bool CompressConvertOnePage(char *buf, char *aux_buf, bool diff_convert) { uint16 max_row_len = 0; uint16 min_row_len = 0; int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real uint16 real_row_cnt = 0; if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) { if (real_order != NULL) { free(real_order); } return false; } CompressConvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt); CompressConvertItemIds(buf, aux_buf); if (diff_convert) { cprs_diff_convert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt); cprs_diff_convert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData), (((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData)); } if (real_order != NULL) { free(real_order); } return true; } void CompressPagePrepareConvert(char *src, bool diff_convert, bool *real_ByteConvert) { char *aux_buf = NULL; errno_t rc; aux_buf = (char *)malloc(BLCKSZ); if (aux_buf == NULL) { // add log return; } rc = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ); securec_check(rc, "", ""); // do convert *real_ByteConvert = false; if (CompressConvertOnePage(src, aux_buf, diff_convert)) { *real_ByteConvert = true; } if (aux_buf != NULL) { free(aux_buf); } } /** * CompressPage() -- Compress one page. * * Only the parts other than the page header will be compressed. The * compressed data is rounded by chunck_size, The insufficient part is * filled with zero. Compression needs to be able to save at least one * chunk of space, otherwise it fail. * This function returen the size of compressed data or * -1 for compression fail * COMPRESS_UNSUPPORTED_ERROR for unrecognized compression algorithm */ template int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option) { int compressed_size; int8 level = option.compressLevelSymbol ? option.compressLevel : -option.compressLevel; size_t sizeOfHeaderData = GetSizeOfHeadData(heapPageData); char *src_copy = NULL; bool real_ByteConvert = false; errno_t rc; char* data; if (option.byteConvert) { // copy and maybe change it src_copy = (char *)malloc(BLCKSZ); if (src_copy == NULL) { // add log return -1; } rc = memcpy_s(src_copy, BLCKSZ, src, BLCKSZ); securec_check(rc, "", ""); CompressPagePrepareConvert(src_copy, option.diffConvert, &real_ByteConvert); /* preprocess convert src */ } if (heapPageData) { data = ((HeapPageCompressData*)dst)->data; } else { data = ((PageCompressData*)dst)->data; } switch (option.compressAlgorithm) { case COMPRESS_ALGORITHM_PGLZ: if (real_ByteConvert) { compressed_size = lz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); } else { compressed_size = lz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data); } break; case COMPRESS_ALGORITHM_ZSTD: { if (level == 0 || level < MIN_ZSTD_COMPRESSION_LEVEL || level > MAX_ZSTD_COMPRESSION_LEVEL) { level = DEFAULT_ZSTD_COMPRESSION_LEVEL; } if (real_ByteConvert) { compressed_size = ZSTD_compress(data, dst_size, src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level); } else { compressed_size = ZSTD_compress(data, dst_size, src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level); } if (ZSTD_isError(compressed_size)) { if (src_copy != NULL) { free(src_copy); } return -1; } break; } default: if (src_copy != NULL) { free(src_copy); } return COMPRESS_UNSUPPORTED_ERROR; } if (compressed_size < 0) { if (src_copy != NULL) { free(src_copy); } return -1; } if (heapPageData) { HeapPageCompressData* pcdptr = ((HeapPageCompressData*)dst); rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); securec_check(rc, "", ""); pcdptr->size = compressed_size; pcdptr->byte_convert = real_ByteConvert; pcdptr->diff_convert = option.diffConvert; } else { PageCompressData* pcdptr = ((PageCompressData*)dst); rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData); securec_check(rc, "", ""); pcdptr->size = compressed_size; pcdptr->byte_convert = real_ByteConvert; pcdptr->diff_convert = option.diffConvert; } if (src_copy != NULL) { free(src_copy); } return SIZE_OF_PAGE_COMPRESS_DATA_HEADER_DATA(heapPageData) + compressed_size; } /** * CompressPageBufferBound() * -- Get the destination buffer boundary to compress one page. * Return needed destination buffer size for compress one page or * -1 for unrecognized compression algorithm */ int CompressPageBufferBound(const char* page, uint8 algorithm) { switch (algorithm) { case COMPRESS_ALGORITHM_PGLZ: return BLCKSZ + 4; case COMPRESS_ALGORITHM_ZSTD: return ZSTD_compressBound(BLCKSZ - GetPageHeaderSize(page)); default: return -1; } }