Files
doris/be/src/exec/decompressor.h
sduzh 6fedf5881b [CodeFormat] Clang-format cpp sources (#4965)
Clang-format all c++ source files.
2020-11-28 18:36:49 +08:00

242 lines
7.9 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <bzlib.h>
#include <lz4/lz4frame.h>
#include <zlib.h>
#ifdef DORIS_WITH_LZO
#include <lzo/lzo1x.h>
#include <lzo/lzoconf.h>
#endif
#include "common/status.h"
namespace doris {
enum CompressType { UNCOMPRESSED, GZIP, DEFLATE, BZIP2, LZ4FRAME, LZOP };
class Decompressor {
public:
virtual ~Decompressor();
// implement in derived class
// input(in): buf where decompress begin
// input_len(in): max length of input buf
// input_bytes_read(out): bytes which is consumed by decompressor
// output(out): buf where to save decompressed data
// output_max_len(in): max length of output buf
// decompressed_len(out): decompressed data size in output buf
// stream_end(out): true if reach the and of stream,
// or normally finished decompressing entire block
// more_input_bytes(out): decompressor need more bytes to consume
// more_output_bytes(out): decompressor need more space to save decompressed data
//
// input and output buf should be allocated and released outside
virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read,
uint8_t* output, size_t output_max_len, size_t* decompressed_len,
bool* stream_end, size_t* more_input_bytes,
size_t* more_output_bytes) = 0;
public:
static Status create_decompressor(CompressType type, Decompressor** decompressor);
virtual std::string debug_info();
CompressType get_type() { return _ctype; }
protected:
virtual Status init() = 0;
Decompressor(CompressType ctype) : _ctype(ctype) {}
CompressType _ctype;
};
class GzipDecompressor : public Decompressor {
public:
virtual ~GzipDecompressor();
virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read,
uint8_t* output, size_t output_max_len, size_t* decompressed_len,
bool* stream_end, size_t* more_input_bytes,
size_t* more_output_bytes) override;
virtual std::string debug_info() override;
private:
friend class Decompressor;
GzipDecompressor(bool is_deflate);
virtual Status init() override;
private:
bool _is_deflate;
z_stream _z_strm;
// These are magic numbers from zlib.h. Not clear why they are not defined there.
const static int WINDOW_BITS = 15; // Maximum window size
const static int DETECT_CODEC = 32; // Determine if this is libz or gzip from header.
};
class Bzip2Decompressor : public Decompressor {
public:
virtual ~Bzip2Decompressor();
virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read,
uint8_t* output, size_t output_max_len, size_t* decompressed_len,
bool* stream_end, size_t* more_input_bytes,
size_t* more_output_bytes) override;
virtual std::string debug_info() override;
private:
friend class Decompressor;
Bzip2Decompressor() : Decompressor(CompressType::BZIP2) {}
virtual Status init() override;
private:
bz_stream _bz_strm;
};
class Lz4FrameDecompressor : public Decompressor {
public:
virtual ~Lz4FrameDecompressor();
virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read,
uint8_t* output, size_t output_max_len, size_t* decompressed_len,
bool* stream_end, size_t* more_input_bytes,
size_t* more_output_bytes) override;
virtual std::string debug_info() override;
private:
friend class Decompressor;
Lz4FrameDecompressor() : Decompressor(CompressType::LZ4FRAME) {}
virtual Status init() override;
size_t get_block_size(const LZ4F_frameInfo_t* info);
private:
LZ4F_dctx* _dctx;
size_t _expect_dec_buf_size;
const static unsigned DORIS_LZ4F_VERSION;
};
#ifdef DORIS_WITH_LZO
class LzopDecompressor : public Decompressor {
public:
virtual ~LzopDecompressor();
virtual Status decompress(uint8_t* input, size_t input_len, size_t* input_bytes_read,
uint8_t* output, size_t output_max_len, size_t* decompressed_len,
bool* stream_end, size_t* more_input_bytes,
size_t* more_output_bytes) override;
virtual std::string debug_info() override;
private:
friend class Decompressor;
LzopDecompressor()
: Decompressor(CompressType::LZOP), _header_info({0}), _is_header_loaded(false) {}
virtual Status init() override;
private:
enum LzoChecksum { CHECK_NONE, CHECK_CRC32, CHECK_ADLER };
private:
inline uint8_t* get_uint8(uint8_t* ptr, uint8_t* value) {
*value = *ptr;
return ptr + sizeof(uint8_t);
}
inline uint8_t* get_uint16(uint8_t* ptr, uint16_t* value) {
*value = *ptr << 8 | *(ptr + 1);
return ptr + sizeof(uint16_t);
}
inline uint8_t* get_uint32(uint8_t* ptr, uint32_t* value) {
*value = (*ptr << 24) | (*(ptr + 1) << 16) | (*(ptr + 2) << 8) | *(ptr + 3);
return ptr + sizeof(uint32_t);
}
inline LzoChecksum header_type(int flags) {
return (flags & F_H_CRC32) ? CHECK_CRC32 : CHECK_ADLER;
}
inline LzoChecksum input_type(int flags) {
return (flags & F_CRC32_C) ? CHECK_CRC32 : (flags & F_ADLER32_C) ? CHECK_ADLER : CHECK_NONE;
}
inline LzoChecksum output_type(int flags) {
return (flags & F_CRC32_D) ? CHECK_CRC32 : (flags & F_ADLER32_D) ? CHECK_ADLER : CHECK_NONE;
}
Status parse_header_info(uint8_t* input, size_t input_len, size_t* input_bytes_read,
size_t* more_bytes_needed);
Status checksum(LzoChecksum type, const std::string& source, uint32_t expected, uint8_t* ptr,
size_t len);
private:
// lzop header info
struct HeaderInfo {
uint16_t version;
uint16_t lib_version;
uint16_t version_needed;
uint8_t method;
std::string filename;
uint32_t header_size;
LzoChecksum header_checksum_type;
LzoChecksum input_checksum_type;
LzoChecksum output_checksum_type;
};
struct HeaderInfo _header_info;
// true if header is decompressed and loaded
bool _is_header_loaded;
private:
const static uint8_t LZOP_MAGIC[9];
const static uint64_t LZOP_VERSION;
const static uint64_t MIN_LZO_VERSION;
const static uint32_t MIN_HEADER_SIZE;
const static uint32_t LZO_MAX_BLOCK_SIZE;
const static uint32_t CRC32_INIT_VALUE;
const static uint32_t ADLER32_INIT_VALUE;
const static uint64_t F_H_CRC32;
const static uint64_t F_MASK;
const static uint64_t F_OS_MASK;
const static uint64_t F_CS_MASK;
const static uint64_t F_RESERVED;
const static uint64_t F_MULTIPART;
const static uint64_t F_H_FILTER;
const static uint64_t F_H_EXTRA_FIELD;
const static uint64_t F_CRC32_C;
const static uint64_t F_ADLER32_C;
const static uint64_t F_CRC32_D;
const static uint64_t F_ADLER32_D;
};
#endif // DORIS_WITH_LZO
} // namespace doris