.

2023-06-07 13:58:24 +02:00
parent 9aeaf67834
commit ab4b902fdb
366 changed files with 83832 additions and 0 deletions
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/dependabot.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/dependabot.yml
@ -0,0 +1,7 @@
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/flake8_cython.cfg
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/flake8_cython.cfg
@ -0,0 +1,4 @@
+[flake8]
+filename=*.pyx,*.pxd
+select=E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
+show_source=True
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/flake8_python.cfg
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/flake8_python.cfg
@ -0,0 +1,3 @@
+[flake8]
+ignore=E501,E203,W503,E266
+show_source=True
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/install_hdf5.sh
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/install_hdf5.sh
@ -0,0 +1,10 @@
+HDF5_VERSION=$1
+
+# Download and install HDF5 $HDF5_VERSION from source for building wheels
+curl https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz -O -s
+tar -xzf hdf5-$HDF5_VERSION.tar.gz
+cd hdf5-$HDF5_VERSION
+./configure --prefix=/usr/local
+make -j 2
+make install
+cd ..
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/lint.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/lint.yml
@ -0,0 +1,32 @@
+name: bitshuffle-ci-build
+on:
+  pull_request:
+    branches:
+    - master
+  push:
+    branches:
+    - master
+
+jobs:
+
+  lint-code:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install pip dependencies
+        run: |
+          pip install black flake8
+
+      - name: Run flake8
+        run: |
+          flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_python.cfg bitshuffle tests
+          flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_cython.cfg bitshuffle tests
+
+      - name: Check code with black
+        run: black --check .
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/main.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/main.yml
@ -0,0 +1,58 @@
+name: bitshuffle-ci-build
+on:
+  pull_request:
+    branches:
+    - master
+  push:
+    branches:
+    - master
+
+jobs:
+  run-tests:
+
+    strategy:
+      matrix:
+        python-version: ["3.6", "3.7", "3.10"]
+        os: [ubuntu-latest, macos-latest]
+        exclude:
+        - os: macos-latest
+          python-version: "3.6"
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install apt dependencies
+      if: ${{ matrix.os == 'ubuntu-latest' }}
+      run: |
+        sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config
+
+    - name: Install homebrew dependencies
+      if: ${{ matrix.os == 'macos-latest' }}
+      run: |
+        brew install hdf5 pkg-config
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install h5py
+      if: ${{ matrix.os == 'macos-latest' }}
+      run: |
+        pip install h5py
+
+    - name: Install pip dependencies
+      run: |
+        pip install Cython
+        pip install -r requirements.txt
+        pip install pytest
+
+        # Pull in ZSTD repo
+        git submodule update --init
+
+        # Installing the plugin to arbitrary directory to check the install script.
+        python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib --zstd
+
+    - name: Run tests
+      run: pytest -v .
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/wheels.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.github/workflows/wheels.yml
@ -0,0 +1,104 @@
+name: Build bitshuffle wheels and upload to PyPI
+
+on:
+  workflow_dispatch:
+  release:
+    types:
+    - published
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }} and hdf5-${{ matrix.hdf5 }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        hdf5: ["1.10.7"]
+
+    steps:
+      # Checkout bitshuffle
+      - uses: actions/checkout@v3
+
+      # Build wheels for linux and x86 platforms
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.11.2
+        with:
+          output-dir: ./wheelhouse-hdf5-${{ matrix.hdf5}}
+        env:
+          CIBW_SKIP: "pp* *musllinux* cp311-macosx*"
+          CIBW_ARCHS: "x86_64"
+          CIBW_BEFORE_ALL: |
+            chmod +x .github/workflows/install_hdf5.sh
+            .github/workflows/install_hdf5.sh ${{ matrix.hdf5 }}
+            git submodule update --init
+          # Only build Haswell wheels on x86 for compatibility
+          CIBW_ENVIRONMENT: >
+            LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
+            CPATH=/usr/local/include
+            ENABLE_ZSTD=1
+            BITSHUFFLE_ARCH=haswell
+          CIBW_TEST_REQUIRES: pytest
+          # Install different version of HDF5 for unit tests to ensure the
+          # wheels are independent of HDF5 installation
+          # CIBW_BEFORE_TEST: |
+          #   chmod +x .github/workflows/install_hdf5.sh
+          #   .github/workflows/install_hdf5.sh 1.8.11
+          # Run units tests but disable test_h5plugin.py
+          CIBW_TEST_COMMAND: pytest {package}/tests
+          # The Github runners for macOS don't support AVX2 instructions and so the tests will fail with SIGILL, so skip them
+          CIBW_TEST_SKIP: "*macosx*"
+
+      # Package wheels and host on CI
+      - uses: actions/upload-artifact@v3
+        with:
+          path: ./wheelhouse-hdf5-${{ matrix.hdf5 }}/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    strategy:
+      matrix:
+        python-version: ["3.8"]
+
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install apt dependencies
+        run: |
+          sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config
+
+      - name: Install Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install pip dependencies
+        run: |
+          pip install -r requirements.txt
+
+      - name: Build sdist
+        run: python setup.py sdist
+
+      - uses: actions/upload-artifact@v3
+        with:
+          path: dist/*.tar.gz
+
+  # Upload to PyPI
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    # Upload to PyPI on every tag
+    # if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
+    # Alternatively, to publish when a GitHub Release is created, use the following rule:
+    if: github.event_name == 'release' && github.event.action == 'published'
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          name: artifact
+          path: dist
+
+      - uses: pypa/gh-action-pypi-publish@v1.5.1
+        with:
+          user: __token__
+          password: ${{ secrets.pypi_password }}
+          # To test: repository_url: https://test.pypi.org/legacy/
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.gitignore
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.gitignore
@ -0,0 +1,79 @@
+## C
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+
+## Python
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+__pycache__
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Documentation builds
+doc/_build
+doc/generated
+
+## Editor files and backups.
+*.swp
+*.swo
+
+# Generated files
+bitshuffle/ext.c
+bitshuffle/h5.c
+
+# ItelliJ
+.idea
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.gitmodules
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/.gitmodules
@ -0,0 +1,3 @@
+[submodule "zstd"]
+	path = zstd
+	url = https://github.com/facebook/zstd
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/LICENSE
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/LICENSE
@ -0,0 +1,21 @@
+Bitshuffle - Filter for improving compression of typed binary data.
+
+Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/MANIFEST.in
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/MANIFEST.in
@ -0,0 +1,10 @@
+recursive-include src *.h *.c
+recursive-include bitshuffle *.pyx
+recursive-include lz4 *.h *.c
+recursive-include lzf *.h *.c
+include setup.cfg.example
+include LICENSE
+include README.rst
+include requirements.txt
+exclude setup.cfg
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/README.rst
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/README.rst
@ -0,0 +1,282 @@
+==========
+Bitshuffle
+==========
+
+Filter for improving compression of typed binary data.
+
+Bitshuffle is an algorithm that rearranges typed, binary data for improving
+compression, as well as a python/C package that implements this algorithm
+within the Numpy framework.
+
+The library can be used along side HDF5 to compress and decompress datasets and
+is integrated through the `dynamically loaded filters`_ framework. Bitshuffle
+is HDF5 filter number ``32008``.
+
+Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_
+except it operates at the bit level instead of the byte level. Arranging a
+typed data array in to a matrix with the elements as the rows and the bits
+within the elements as the columns, Bitshuffle "transposes" the matrix,
+such that all the least-significant-bits are in a row, etc.  This transpose
+is performed within blocks of data roughly 8 kB long [1]_.
+
+This does not in itself compress data, only rearranges it for more efficient
+compression. To perform the actual compression you will need a compression
+library.  Bitshuffle has been designed to be well matched to Marc Lehmann's
+LZF_ as well as LZ4_ and ZSTD_. Note that because Bitshuffle modifies the data at the bit
+level, sophisticated entropy reducing compression libraries such as GZIP and
+BZIP are unlikely to achieve significantly better compression than simpler and
+faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD. Bitshuffle
+thus includes routines (and HDF5 filter options) to apply LZ4 and ZSTD compression to
+each block after shuffling [2]_.
+
+The Bitshuffle algorithm relies on neighbouring elements of a dataset being
+highly correlated to improve data compression. Any correlations that span at
+least 24 elements of the dataset may be exploited to improve compression.
+
+Bitshuffle was designed with performance in mind. On most machines the
+time required for Bitshuffle+LZ4 is insignificant compared to the time required
+to read or write the compressed data to disk. Because it is able to exploit the
+SSE and AVX instruction sets present on modern Intel and AMD processors, on
+these machines compression is only marginally slower than an out-of-cache
+memory copy.  On modern x86 processors you can expect Bitshuffle to have a
+throughput of roughly 1 byte per clock cycle, and on the Haswell generation of
+Intel processors (2013) and later, you can expect up to 2 bytes per clock
+cycle. In addition, Bitshuffle is parallelized using OpenMP.
+
+As a bonus, Bitshuffle ships with a dynamically loaded version of
+`h5py`'s LZF compression filter, such that the filter can be transparently
+used outside of python and in command line utilities such as ``h5dump``.
+
+.. [1] Chosen to fit comfortably within L1 cache as well as be well matched
+       window of the LZF compression library.
+
+.. [2] Over applying bitshuffle to the full dataset then applying LZ4/ZSTD
+       compression, this has the tremendous advantage that the block is
+       already in the L1 cache.
+
+.. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf
+
+.. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf
+
+.. _LZF: http://oldhome.schmorp.de/marc/liblzf.html
+
+.. _LZ4: https://code.google.com/p/lz4/
+
+.. _ZSTD: https://github.com/facebook/zstd
+
+
+Applications
+------------
+
+Bitshuffle might be right for your application if:
+
+- You need to compress typed binary data.
+- Your data is arranged such that adjacent elements over the fastest varying
+  index of your dataset are similar (highly correlated).
+- A special case of the previous point is if you are only exercising a subset
+  of the bits in your data-type, as is often true of integer data.
+- You need both high compression ratios and high performance.
+
+
+Comparing Bitshuffle to other compression algorithms and HDF5 filters:
+
+- Bitshuffle is less general than many other compression algorithms.
+  To achieve good compression ratios, consecutive elements of your data must
+  be highly correlated.
+- For the right datasets, Bitshuffle is one of the few compression
+  algorithms that promises both high throughput and high compression ratios.
+- Bitshuffle should have roughly the same throughput as Shuffle, but
+  may obtain higher compression ratios.
+- The MAFISC_ filter actually includes something similar to Bitshuffle as one of
+  its prefilters,  However, MAFICS's emphasis is on obtaining high compression
+  ratios at all costs, sacrificing throughput.
+
+.. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc
+
+
+Installation for Python
+-----------------------
+
+
+In most cases bitshuffle can be installed by `pip`::
+
+    pip install bitshuffle
+
+On Linux and macOS x86_64 platforms binary wheels are available, on other platforms a
+source build will be performed. The binary wheels are built with AVX2 support and will
+only run processors that support these instructions (most processors from 2015 onwards,
+i.e. Intel Haswell, AMD Excavator and later). On an unsupported processor these builds
+of bitshuffle will crash with `SIGILL`. To run on unsupported x86_64 processors, or
+target newer instructions such as AVX512, you should perform a build from source.
+This can be forced by giving pip the `--no-binary=bitshuffle` option.
+
+Source installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python
+(h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically
+loaded HDF5 filter requires HDF5 1.8.11 or later.
+
+For total control, bitshuffle can be built using `python setup.py`. If ZSTD support is
+to be enabled the ZSTD repo needs to pulled into bitshuffle before installation with::
+
+    git submodule update --init
+
+To build and install bitshuffle::
+
+    python setup.py install [--h5plugin [--h5plugin-dir=spam] --zstd]
+
+To get finer control of installation options, including whether to compile with OpenMP
+multi-threading and the target microarchitecture copy the ``setup.cfg.example`` to
+``setup.cfg`` and edit the values therein.
+
+If using the dynamically loaded HDF5 filter (which gives you access to the
+Bitshuffle and LZF filters outside of python), set the environment variable
+``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default
+search location of ``/usr/local/hdf5/lib/plugin``.
+
+ZSTD support is enabled with ``--zstd``.
+
+If you get an error about missing source files when building the extensions,
+try upgrading setuptools.  There is a weird bug where setuptools prior to 0.7
+doesn't work properly with Cython in some cases.
+
+.. _source: http://docs.h5py.org/en/latest/build.html#source-installation
+
+
+Usage from Python
+-----------------
+
+The `bitshuffle` module contains routines for shuffling and unshuffling
+Numpy arrays.
+
+If installed with the dynamically loaded filter plugins, Bitshuffle can be used
+in conjunction with HDF5 both inside and outside of python, in the same way as
+any other filter; simply by specifying the filter number ``32008``. Otherwise
+the filter will be available only within python and only after importing
+`bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent.
+The filter can be added to new datasets either through the `h5py` low level
+interface or through the convenience functions provided in
+`bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py`
+version 2.5.0 and later Bitshuffle can be added to new datasets through the
+high level interface, as in the example below.
+
+The compression algorithm can be configured using the `filter_opts` in
+`bitshuffle.h5.create_dataset()`. LZ4 is chosen with:
+`(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with:
+`(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)`. See `test_h5filter.py` for an example.
+
+Example h5py
+------------
+::
+
+    import h5py
+    import numpy
+    import bitshuffle.h5
+
+    print(h5py.__version__) # >= '2.5.0'
+
+    f = h5py.File(filename, "w")
+
+    # block_size = 0 let Bitshuffle choose its value
+    block_size = 0
+
+    dataset = f.create_dataset(
+        "data",
+        (100, 100, 100),
+        compression=bitshuffle.h5.H5FILTER,
+        compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
+        dtype='float32',
+        )
+
+    # create some random data
+    array = numpy.random.rand(100, 100, 100)
+    array = array.astype('float32')
+
+    dataset[:] = array
+
+    f.close()
+
+
+Usage from C
+------------
+
+If you wish to use Bitshuffle in your C program and would prefer not to use the
+HDF5 dynamically loaded filter, the C library in the ``src/`` directory is
+self-contained and complete.
+
+
+Usage from Java
+---------------
+
+You can use Bitshuffle even in Java and the routines for shuffling and unshuffling
+are ported into `snappy-java`_. To use the routines, you need to add the following
+dependency to your pom.xml::
+
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <version>1.1.3-M1</version>
+    </dependency>
+
+First, import org.xerial.snapy.BitShuffle in your Java code::
+
+    import org.xerial.snappy.BitShuffle;
+
+Then, you use them like this::
+
+    int[] data = new int[] {1, 3, 34, 43, 34};
+    byte[] shuffledData = BitShuffle.bitShuffle(data);
+    int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData);
+
+.. _`snappy-java`: https://github.com/xerial/snappy-java
+
+
+Rust HDF5 plugin
+----------------
+
+If you wish to open HDF5 files compressed with bitshuffle in your Rust program, there is a `Rust binding`_ for it.
+In your Cargo.toml::
+
+    [dependencies]
+    ...
+    hdf5-bitshuffle = "0.9"
+    ...
+
+To register the plugin in your code::
+
+    use hdf5_bitshuffle::register_bitshuffle_plugin;
+
+    fn main() {
+        register_bitshuffle_plugin();
+    }
+
+.. _`Rust binding`: https://docs.rs/hdf5-bitshuffle/latest/hdf5_bitshuffle/
+
+Anaconda
+--------
+
+The conda package can be build via::
+
+    conda build conda-recipe
+
+
+For Best Results
+----------------
+
+Here are a few tips to help you get the most out of Bitshuffle:
+
+- For multi-dimensional datasets, order your data such that the fastest varying
+  dimension is the one over which your data is most correlated (have
+  values that change the least), or fake this using chunks.
+- To achieve the highest throughput, use a data type that is 64 *bytes* or
+  smaller. If you have a very large compound data type, consider adding a
+  dimension to your datasets instead.
+- To make full use of the SSE2 instruction set, use a data type whose size
+  is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose
+  size is a multiple of 4 bytes.
+
+
+Citing Bitshuffle
+-----------------
+
+Bitshuffle was initially described in
+http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at
+http://arxiv.org/abs/1503.00638.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/init.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/init.py
@ -0,0 +1,57 @@
+# flake8: noqa
+"""
+Filter for improving compression of typed binary data.
+
+Functions
+=========
+
+    using_NEON
+    using_SSE2
+    using_AVX2
+    using_AVX512
+    bitshuffle
+    bitunshuffle
+    compress_lz4
+    decompress_lz4
+    compress_zstd
+    decompress_zstd
+
+"""
+
+from __future__ import absolute_import
+
+
+from bitshuffle.ext import (
+    __version__,
+    __zstd__,
+    bitshuffle,
+    bitunshuffle,
+    using_NEON,
+    using_SSE2,
+    using_AVX2,
+    using_AVX512,
+    compress_lz4,
+    decompress_lz4,
+)
+
+# Import ZSTD API if enabled
+zstd_api = []
+if __zstd__:
+    from bitshuffle.ext import (
+        compress_zstd,
+        decompress_zstd,
+    )
+
+    zstd_api += ["compress_zstd", "decompress_zstd"]
+
+__all__ = [
+    "__version__",
+    "bitshuffle",
+    "bitunshuffle",
+    "using_NEON",
+    "using_SSE2",
+    "using_AVX2",
+    "using_AVX512",
+    "compress_lz4",
+    "decompress_lz4",
+] + zstd_api
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/ext.pyx
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/ext.pyx
@ -0,0 +1,598 @@
+"""
+Wrappers for public and private bitshuffle routines
+
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import numpy as np
+
+cimport numpy as np
+cimport cython
+
+
+np.import_array()
+
+
+# Repeat each calculation this many times. For timing.
+cdef int REPEATC = 1
+# cdef int REPEATC = 32
+
+REPEAT = REPEATC
+
+cdef extern from b"bitshuffle.h":
+    int bshuf_using_NEON()
+    int bshuf_using_SSE2()
+    int bshuf_using_AVX2()
+    int bshuf_using_AVX512()
+    int bshuf_bitshuffle(void *A, void *B, int size, int elem_size,
+                         int block_size) nogil
+    int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size,
+                           int block_size) nogil
+    int bshuf_compress_lz4_bound(int size, int elem_size, int block_size)
+    int bshuf_compress_lz4(void *A, void *B, int size, int elem_size,
+                           int block_size) nogil
+    int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size,
+                             int block_size) nogil
+    IF ZSTD_SUPPORT:
+        int bshuf_compress_zstd_bound(int size, int elem_size, int block_size)
+        int bshuf_compress_zstd(void *A, void *B, int size, int elem_size,
+                                int block_size, const int comp_lvl) nogil
+        int bshuf_decompress_zstd(void *A, void *B, int size, int elem_size,
+                                  int block_size) nogil
+    int BSHUF_VERSION_MAJOR
+    int BSHUF_VERSION_MINOR
+    int BSHUF_VERSION_POINT
+
+__version__ = "%d.%d.%d" % (BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
+                            BSHUF_VERSION_POINT)
+
+IF ZSTD_SUPPORT:
+    __zstd__ = True
+ELSE:
+    __zstd__ = False
+
+# Prototypes from bitshuffle.c
+cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_AVX512(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_AVX512(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_AVX512(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_AVX512(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size)
+
+
+ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size)
+
+
+def using_NEON():
+    """Whether compiled using Arm NEON instructions."""
+    if bshuf_using_NEON():
+        return True
+    else:
+        return False
+
+
+def using_SSE2():
+    """Whether compiled using SSE2 instructions."""
+    if bshuf_using_SSE2():
+        return True
+    else:
+        return False
+
+
+def using_AVX2():
+    """Whether compiled using AVX2 instructions."""
+    if bshuf_using_AVX2():
+        return True
+    else:
+        return False
+
+
+def using_AVX512():
+    """Whether compiled using AVX512 instructions."""
+    if bshuf_using_AVX512():
+        return True
+    else:
+        return False
+
+
+def _setup_arr(arr):
+    shape = tuple(arr.shape)
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    out = np.empty(shape, dtype=dtype)
+    return out, size, itemsize
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef _wrap_C_fun(Cfptr fun, np.ndarray arr):
+    """Wrap a C function with standard call signature."""
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    for ii in range(REPEATC):
+        count = fun(arr_ptr, out_ptr, size, itemsize)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+def copy(np.ndarray arr not None):
+    """Copies the data.
+
+    For testing and profiling purposes.
+
+    """
+    return _wrap_C_fun(&bshuf_copy, arr)
+
+
+def trans_byte_elem_scal(np.ndarray arr not None):
+    """Transpose bytes within words but not bits.
+
+    """
+    return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr)
+
+
+def trans_byte_elem_SSE(np.ndarray arr not None):
+    """Transpose bytes within array elements.
+
+    """
+    return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr)
+
+
+def trans_byte_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr)
+
+
+def trans_bit_byte_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr)
+
+
+def trans_bit_byte_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr)
+
+
+def trans_bit_byte_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr)
+
+
+def trans_bit_byte_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr)
+
+
+def trans_bit_byte_AVX512(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_AVX512, arr)
+
+
+def trans_bitrow_eight(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr)
+
+
+def trans_bit_elem_AVX512(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_AVX512, arr)
+
+
+def trans_bit_elem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr)
+
+
+def trans_bit_elem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr)
+
+
+def trans_bit_elem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr)
+
+
+def trans_bit_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr)
+
+
+def trans_byte_bitrow_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr)
+
+
+def trans_byte_bitrow_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr)
+
+
+def trans_byte_bitrow_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr)
+
+
+def trans_byte_bitrow_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr)
+
+
+def shuffle_bit_eightelem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr)
+
+
+def shuffle_bit_eightelem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr)
+
+
+def shuffle_bit_eightelem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr)
+
+
+def shuffle_bit_eightelem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr)
+
+
+def shuffle_bit_eightelem_AVX512(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX512, arr)
+
+
+def untrans_bit_elem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr)
+
+
+def untrans_bit_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr)
+
+
+def untrans_bit_elem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr)
+
+
+def untrans_bit_elem_AVX512(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX512, arr)
+
+
+def untrans_bit_elem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr)
+
+
+def trans_bit_elem(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem, arr)
+
+
+def untrans_bit_elem(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem, arr)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def bitshuffle(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle an array.
+
+    Output array is the same shape and data type as input array but underlying
+    buffer has been bitshuffled.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. By default, block size is chosen
+        automatically.
+
+    Returns
+    -------
+    out : numpy array
+        Array with the same shape as input but underlying data has been
+        bitshuffled.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def bitunshuffle(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle an array.
+
+    Output array is the same shape and data type as input array but underlying
+    buffer has been un-bitshuffled.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. Must match value used for shuffling.
+
+    Returns
+    -------
+    out : numpy array
+        Array with the same shape as input but underlying data has been
+        un-bitshuffled.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def compress_lz4(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle then compress an array using LZ4.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. By default, block size is chosen
+        automatically.
+
+    Returns
+    -------
+    out : array with np.uint8 data type
+        Buffer holding compressed data.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    shape = (arr.shape[i] for i in range(arr.ndim))
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+
+    max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size)
+
+    cdef np.ndarray out
+    out = np.empty(max_out_size, dtype=np.uint8)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out[:count]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0):
+    """Decompress a buffer using LZ4 then bitunshuffle it yielding an array.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Input data to be decompressed.
+    shape : tuple of integers
+        Shape of the output (decompressed array). Must match the shape of the
+        original data array before compression.
+    dtype : numpy dtype
+        Datatype of the output array. Must match the data type of the original
+        data array before compression.
+    block_size : positive integer
+        Block size in number of elements. Must match value used for
+        compression.
+
+    Returns
+    -------
+    out : numpy array with shape *shape* and data type *dtype*
+        Decompressed data.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = np.prod(shape)
+    itemsize = dtype.itemsize
+
+    cdef np.ndarray out
+    out = np.empty(tuple(shape), dtype=dtype)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize,
+                                         block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    if count != arr.size:
+        msg = "Decompressed different number of bytes than input buffer size."
+        msg += "Input buffer %d, decompressed %d." % (arr.size, count)
+        raise RuntimeError(msg, count)
+    return out
+
+
+IF ZSTD_SUPPORT:
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def compress_zstd(np.ndarray arr not None, int block_size=0, int comp_lvl=1):
+        """Bitshuffle then compress an array using ZSTD.
+    
+        Parameters
+        ----------
+        arr : numpy array
+            Data to be processed.
+        block_size : positive integer
+            Block size in number of elements. By default, block size is chosen
+            automatically.
+        comp_lvl : positive integer
+            Compression level applied by ZSTD
+    
+        Returns
+        -------
+        out : array with np.uint8 data type
+            Buffer holding compressed data.
+    
+        """
+    
+        cdef int ii, size, itemsize, count=0
+        shape = (arr.shape[i] for i in range(arr.ndim))
+        if not arr.flags['C_CONTIGUOUS']:
+            msg = "Input array must be C-contiguous."
+            raise ValueError(msg)
+        size = arr.size
+        dtype = arr.dtype
+        itemsize = dtype.itemsize
+    
+        max_out_size = bshuf_compress_zstd_bound(size, itemsize, block_size)
+    
+        cdef np.ndarray out
+        out = np.empty(max_out_size, dtype=np.uint8)
+    
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+        arr_flat = arr.view(np.uint8).ravel()
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+        out_flat = out.view(np.uint8).ravel()
+        cdef void* arr_ptr = <void*> &arr_flat[0]
+        cdef void* out_ptr = <void*> &out_flat[0]
+        with nogil:
+            for ii in range(REPEATC):
+                count = bshuf_compress_zstd(arr_ptr, out_ptr, size, itemsize, block_size, comp_lvl)
+        if count < 0:
+            msg = "Failed. Error code %d."
+            excp = RuntimeError(msg % count, count)
+            raise excp
+        return out[:count]
+    
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def decompress_zstd(np.ndarray arr not None, shape, dtype, int block_size=0):
+        """Decompress a buffer using ZSTD then bitunshuffle it yielding an array.
+    
+        Parameters
+        ----------
+        arr : numpy array
+            Input data to be decompressed.
+        shape : tuple of integers
+            Shape of the output (decompressed array). Must match the shape of the
+            original data array before compression.
+        dtype : numpy dtype
+            Datatype of the output array. Must match the data type of the original
+            data array before compression.
+        block_size : positive integer
+            Block size in number of elements. Must match value used for
+            compression.
+    
+        Returns
+        -------
+        out : numpy array with shape *shape* and data type *dtype*
+            Decompressed data.
+    
+        """
+    
+        cdef int ii, size, itemsize, count=0
+        if not arr.flags['C_CONTIGUOUS']:
+            msg = "Input array must be C-contiguous."
+            raise ValueError(msg)
+        size = np.prod(shape)
+        itemsize = dtype.itemsize
+    
+        cdef np.ndarray out
+        out = np.empty(tuple(shape), dtype=dtype)
+    
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+        arr_flat = arr.view(np.uint8).ravel()
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+        out_flat = out.view(np.uint8).ravel()
+        cdef void* arr_ptr = <void*> &arr_flat[0]
+        cdef void* out_ptr = <void*> &out_flat[0]
+        with nogil:
+            for ii in range(REPEATC):
+                count = bshuf_decompress_zstd(arr_ptr, out_ptr, size, itemsize,
+                                              block_size)
+        if count < 0:
+            msg = "Failed. Error code %d."
+            excp = RuntimeError(msg % count, count)
+            raise excp
+        if count != arr.size:
+            msg = "Decompressed different number of bytes than input buffer size."
+            msg += "Input buffer %d, decompressed %d." % (arr.size, count)
+            raise RuntimeError(msg, count)
+        return out
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/h5.pyx
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/bitshuffle/h5.pyx
@ -0,0 +1,235 @@
+"""
+HDF5 support for Bitshuffle.
+
+To read a dataset that uses the Bitshuffle filter using h5py, simply import
+this module (unless you have installed the Bitshuffle dynamically loaded
+filter, in which case importing this module is unnecessary).
+
+To create a new dataset that includes the Bitshuffle filter, use one of the
+convenience functions provided.
+
+
+Constants
+=========
+
+    H5FILTER : The Bitshuffle HDF5 filter integer identifier.
+    H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression.
+    H5_COMPRESS_ZSTD : Filter option flag for ZSTD compression.
+
+Functions
+=========
+
+    create_dataset
+    create_bitshuffle_lzf_dataset
+    create_bitshuffle_compressed_dataset
+
+Examples
+========
+
+    >>> import numpy as np
+    >>> import h5py
+    >>> import bitshuffle.h5
+
+    >>> shape = (123, 456)
+    >>> chunks = (10, 456)
+    >>> dtype = np.float64
+
+    >>> f = h5py.File("tmp_test.h5")
+    >>> bitshuffle.h5.create_bitshuffle_compressed_dataset(
+            f, "some_data", shape, dtype, chunks)
+    >>> f["some_data"][:] = 42
+
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+import numpy
+import h5py
+from h5py import h5d, h5fd, h5s, h5t, h5p, h5z, defs, filters
+
+cimport cython
+
+
+cdef extern from b"bshuf_h5filter.h":
+    int bshuf_register_h5filter()
+    int BSHUF_H5FILTER
+    int BSHUF_H5_COMPRESS_LZ4
+    int BSHUF_H5_COMPRESS_ZSTD
+
+cdef extern int init_filter(const char* libname)
+
+cdef int LZF_FILTER = 32000
+
+H5FILTER = BSHUF_H5FILTER
+H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4
+H5_COMPRESS_ZSTD = BSHUF_H5_COMPRESS_ZSTD
+
+# Init HDF5 dynamic loading with HDF5 library used by h5py
+if not sys.platform.startswith('win'):
+    if sys.version_info[0] >= 3:
+        libs = [bytes(h5d.__file__, encoding='utf-8'),
+                bytes(h5fd.__file__, encoding='utf-8'),
+                bytes(h5s.__file__, encoding='utf-8'),
+                bytes(h5t.__file__, encoding='utf-8'),
+                bytes(h5p.__file__, encoding='utf-8'),
+                bytes(h5z.__file__, encoding='utf-8'),
+                bytes(defs.__file__, encoding='utf-8')]
+    else:
+        libs = [h5d.__file__, h5fd.__file__, h5s.__file__, h5t.__file__,
+                h5p.__file__, h5z.__file__, defs.__file__]
+
+    # Ensure all symbols are loaded
+    success = -1
+    for lib in libs:
+        success = init_filter(lib)
+        if success == 0:
+            break
+
+    if success == -1:
+        raise RuntimeError("Failed to load all HDF5 symbols using these libs: {}".format(libs))
+
+
+def register_h5_filter():
+    ret = bshuf_register_h5filter()
+    if ret < 0:
+        raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret)
+
+
+register_h5_filter()
+
+
+def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None,
+                   fillvalue=None, track_times=None,
+                   filter_pipeline=(), filter_flags=None, filter_opts=None):
+    """Create a dataset with an arbitrary filter pipeline.
+
+    Return a new low-level dataset identifier.
+
+    Much of this code is copied from h5py, but couldn't reuse much code due to
+    unstable API.
+
+    """
+
+    if hasattr(filter_pipeline, "__getitem__"):
+        filter_pipeline = list(filter_pipeline)
+    else:
+        filter_pipeline = [filter_pipeline]
+        filter_flags = [filter_flags]
+        filter_opts = [filter_opts]
+    nfilters = len(filter_pipeline)
+    if filter_flags is None:
+        filter_flags = [None] * nfilters
+    if filter_opts is None:
+        filter_opts = [None] * nfilters
+    if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters:
+        msg = "Supplied incompatible number of filters, flags, and options."
+        raise ValueError(msg)
+
+    shape = tuple(shape)
+
+    tmp_shape = maxshape if maxshape is not None else shape
+    # Validate chunk shape
+    chunks_larger = (numpy.array([ not i>=j
+                     for i, j in zip(tmp_shape, chunks) if i is not None])).any()
+    if isinstance(chunks, tuple) and chunks_larger:
+        errmsg = ("Chunk shape must not be greater than data shape in any "
+                  "dimension. {} is not compatible with {}".format(chunks, shape))
+        raise ValueError(errmsg)
+
+    if isinstance(dtype, h5py.Datatype):
+        # Named types are used as-is
+        tid = dtype.id
+        dtype = tid.dtype  # Following code needs this
+    else:
+        # Validate dtype
+        dtype = numpy.dtype(dtype)
+        tid = h5t.py_create(dtype, logical=1)
+
+    if shape == ():
+        if any((chunks, filter_pipeline)):
+            raise TypeError("Scalar datasets don't support chunk/filter options")
+        if maxshape and maxshape != ():
+            raise TypeError("Scalar datasets cannot be extended")
+        return h5p.create(h5p.DATASET_CREATE)
+
+    def rq_tuple(tpl, name):
+        """Check if chunks/maxshape match dataset rank"""
+        if tpl in (None, True):
+            return
+        try:
+            tpl = tuple(tpl)
+        except TypeError:
+            raise TypeError('"%s" argument must be None or a sequence object' % name)
+        if len(tpl) != len(shape):
+            raise ValueError('"%s" must have same rank as dataset shape' % name)
+
+    rq_tuple(chunks, 'chunks')
+    rq_tuple(maxshape, 'maxshape')
+
+    if (chunks is True) or (chunks is None and filter_pipeline):
+        chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize)
+
+    if maxshape is True:
+        maxshape = (None,)*len(shape)
+
+    dcpl = h5p.create(h5p.DATASET_CREATE)
+    if chunks is not None:
+        dcpl.set_chunk(chunks)
+        dcpl.set_fill_time(h5d.FILL_TIME_ALLOC)  # prevent resize glitch
+
+    if fillvalue is not None:
+        fillvalue = numpy.array(fillvalue)
+        dcpl.set_fill_value(fillvalue)
+
+    if track_times in (True, False):
+        dcpl.set_obj_track_times(track_times)
+    elif track_times is not None:
+        raise TypeError("track_times must be either True or False")
+
+    for ii in range(nfilters):
+        this_filter = filter_pipeline[ii]
+        this_flags = filter_flags[ii]
+        this_opts = filter_opts[ii]
+        if this_flags is None:
+            this_flags = 0
+        if this_opts is None:
+            this_opts = ()
+        dcpl.set_filter(this_filter, this_flags, this_opts)
+
+    if maxshape is not None:
+        maxshape = tuple(m if m is not None else h5s.UNLIMITED
+                         for m in maxshape)
+    sid = h5s.create_simple(shape, maxshape)
+
+    dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl)
+
+    return dset_id
+
+
+def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None,
+                                  maxshape=None, fillvalue=None,
+                                  track_times=None):
+    """Create dataset with a filter pipeline including bitshuffle and LZF"""
+
+    filter_pipeline = [H5FILTER, LZF_FILTER]
+    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
+                             filter_pipeline=filter_pipeline, maxshape=maxshape,
+                             fillvalue=fillvalue, track_times=track_times)
+    return dset_id
+
+
+def create_bitshuffle_compressed_dataset(parent, name, shape, dtype,
+                                         chunks=None, maxshape=None,
+                                         fillvalue=None, track_times=None):
+    """Create dataset with bitshuffle+internal LZ4 compression."""
+
+    filter_pipeline = [H5FILTER, ]
+    filter_opts = [(0, H5_COMPRESS_LZ4)]
+    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
+                             filter_pipeline=filter_pipeline,
+                             filter_opts=filter_opts, maxshape=maxshape,
+                             fillvalue=fillvalue, track_times=track_times)
+    return dset_id
+
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/bld.bat
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/bld.bat
@ -0,0 +1,3 @@
+SET CONDA_HOME=%PREFIX%
+"%PYTHON%" setup.py install
+if errorlevel 1 exit 1
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/build.sh
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/build.sh
@ -0,0 +1,2 @@
+export CONDA_HOME=$PREFIX
+$PYTHON setup.py install     # Python command to install the script
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/meta.yaml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/meta.yaml
@ -0,0 +1,27 @@
+package:
+    name: bitshuffle
+    version: 0.2.1
+source:
+    # git_url: https://github.com/kiyo-masui/bitshuffle.git
+    # git_rev: 0.2.1
+    path: ..
+    patches:
+      - setup.py.patch
+
+requirements:
+    build:
+        - python
+        - setuptools
+        - cython
+        - numpy
+        - h5py
+        - hdf5
+    run:
+        - python
+        - numpy
+        - h5py
+        - cython
+
+about:
+    home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py
+    summary: "bitshuffle library."
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/setup.py.patch
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/conda-recipe/setup.py.patch
@ -0,0 +1,13 @@
+--- setup.py	2016-01-19 16:56:12.954563000 +0100
+++ xxx.py	2016-01-19 16:56:00.817087000 +0100
+@@ -40,8 +40,8 @@
+ 
+ # Copied from h5py.
+ # TODO, figure out what the canonacal way to do this should be.
+-INCLUDE_DIRS = []
+-LIBRARY_DIRS = []
+INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include']
+LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib']
+ if sys.platform == 'darwin':
+     # putting here both macports and homebrew paths will generate
+     # "ld: warning: dir not found" at the linking phase 
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/LICENSE
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/LICENSE
@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2016, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/README.md
@ -0,0 +1,21 @@
+LZ4 - Library Files
+================================
+
+The __lib__ directory contains several files, but you don't necessarily need them all.
+
+To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**".
+
+For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly.
+
+If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm.
+(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.)
+
+A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***.
+
+The other files are not source code. There are :
+
+ - LICENSE : contains the BSD license text
+ - Makefile : script to compile or install lz4 library (static or dynamic)
+ - liblz4.pc.in : for pkg-config (make install)
+
+[official interoperable frame format]: ../lz4_Frame_format.md
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/lz4.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/lz4.c
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/lz4.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lz4/lz4.h
@ -0,0 +1,774 @@
+/*
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
+
+
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
+
+
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
+#else
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};
+
+typedef struct {
+    const LZ4_byte* externalDict;
+    size_t extDictSize;
+    const LZ4_byte* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *  this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+    void* table[LZ4_STREAMSIZE_VOIDP];
+    LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ *  information structure to track an LZ4 stream during decompression.
+ *  init this structure  using LZ4_setStreamDecode() before first use.
+ *  note : only use in association with static linking !
+ *         this definition is not API/ABI safe,
+ *         and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  else
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/LICENSE.txt
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/LICENSE.txt
@ -0,0 +1,34 @@
+Copyright Notice and Statement for LZF filter
+
+Copyright (c) 2008-2009 Andrew Collette
+http://h5py.alfven.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+a. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+b. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the
+   distribution.
+
+c. Neither the name of the author nor the names of contributors may 
+   be used to endorse or promote products derived from this software 
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/README.txt
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/README.txt
@ -0,0 +1,84 @@
+===============================
+LZF filter for HDF5, revision 3
+===============================
+
+The LZF filter provides high-speed compression with acceptable compression
+performance, resulting in much faster performance than DEFLATE, at the
+cost of a slightly lower compression ratio. It's appropriate for large
+datasets of low to moderate complexity, for which some compression is
+much better than none, but for which the speed of DEFLATE is unacceptable.
+
+This filter has been tested against HDF5 versions 1.6.5 through 1.8.3.  It
+is released under the BSD license (see LICENSE.txt for details).
+
+
+Using the filter from HDF5
+--------------------------
+
+There is exactly one new public function declared in lzf_filter.h, with
+the following signature:
+
+    int register_lzf(void)
+
+Calling this will register the filter with the HDF5 library.  A non-negative
+return value indicates success.  If the registration fails, an error is pushed
+onto the current error stack and a negative value is returned.
+
+It's strongly recommended to use the SHUFFLE filter with LZF, as it's
+cheap, supported by all current versions of HDF5, and can significantly
+improve the compression ratio.  An example C program ("example.c") is included
+which demonstrates the proper use of the filter.
+
+
+Compiling
+---------
+
+The filter consists of a single .c file and header, along with an embedded
+version of the LZF compression library.  Since the filter is stateless, it's
+recommended to statically link the entire thing into your program; for
+example:
+
+    $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog
+
+It can also be built as a shared library, although you will have to install
+the resulting library somewhere the runtime linker can find it:
+
+    $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so
+
+A similar procedure should be used for building C++ code.  As in these
+examples, using option -O1 or higher is strongly recommended for increased
+performance.
+
+
+Contact
+-------
+
+This filter is maintained as part of the HDF5 for Python (h5py) project.  The
+goal of h5py is to provide access to the majority of the HDF5 C API and feature
+set from Python.  The most recent version of h5py (1.1) includes the LZF
+filter by default.
+
+* Downloads and bug tracker:        http://h5py.googlecode.com
+
+* Main web site and documentation:  http://h5py.alfven.org
+
+* Contact email:  h5py at alfven dot org
+
+
+History of changes
+------------------
+
+Revision 3 (6/25/09)
+    Fix issue with changed filter struct definition under HDF5 1.8.3.
+
+Revision 2
+    Minor speed enhancement.
+
+Revision 1
+    Initial release.
+
+
+
+
+
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/README_bitshuffle.txt
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/README_bitshuffle.txt
@ -0,0 +1,3 @@
+The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org).
+The version included with bitshuffle is from version 2.3 of h5py with no
+modifications other than the addition of this README.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/example.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/example.c
@ -0,0 +1,106 @@
+/*
+    Copyright (C) 2009 Andrew Collette
+    http://h5py.alfven.org
+    License: BSD (see LICENSE.txt)
+
+    Example program demonstrating use of the LZF filter from C code.
+
+    To compile this program:
+
+    h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example
+
+    To run:
+
+    $ ./example
+    Success!
+    $ h5ls -v test_lzf.hdf5 
+    Opened "test_lzf.hdf5" with sec2 driver.
+    dset                     Dataset {100/100, 100/100, 100/100}
+        Location:  0:1:0:976
+        Links:     1
+        Modified:  2009-02-15 16:35:11 PST
+        Chunks:    {1, 100, 100} 40000 bytes
+        Storage:   4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization
+        Filter-0:  shuffle-2 OPT {4}
+        Filter-1:  lzf-32000 OPT {1, 261, 40000}
+        Type:      native float
+*/
+
+#include <stdio.h>
+#include "hdf5.h"
+#include "lzf_filter.h"
+
+#define SIZE 100*100*100
+#define SHAPE {100,100,100}
+#define CHUNKSHAPE {1,100,100}
+
+int main(){
+
+    static float data[SIZE];
+    static float data_out[SIZE];
+    const hsize_t shape[] = SHAPE;
+    const hsize_t chunkshape[] = CHUNKSHAPE;
+    int r, i;
+    int return_code = 1;
+
+    hid_t fid, sid, dset, plist = 0;
+
+    for(i=0; i<SIZE; i++){
+        data[i] = i;
+    }
+
+    /* Register the filter with the library */
+    r = register_lzf();
+    if(r<0) goto failed;
+
+    sid = H5Screate_simple(3, shape, NULL);
+    if(sid<0) goto failed;
+
+    fid = H5Fcreate("test_lzf.hdf5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+    if(fid<0) goto failed;
+
+    plist = H5Pcreate(H5P_DATASET_CREATE);
+    if(plist<0) goto failed;
+
+    /* Chunked layout required for filters */
+    r = H5Pset_chunk(plist, 3, chunkshape);
+    if(r<0) goto failed;
+
+    /* Use of the shuffle filter VASTLY improves performance of this
+       and other block-oriented compression filters.  Be sure to add
+       this before the compression filter!
+    */
+    r = H5Pset_shuffle(plist);
+    if(r<0) goto failed;
+
+    /* Note the "optional" flag is necessary, as with the DEFLATE filter */
+    r = H5Pset_filter(plist, H5PY_FILTER_LZF, H5Z_FLAG_OPTIONAL, 0, NULL);
+    if(r<0) goto failed;
+
+    dset = H5Dcreate(fid, "dset", H5T_NATIVE_FLOAT, sid, plist);
+    if(dset<0) goto failed;
+    
+    r = H5Dwrite(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data);
+    if(r<0) goto failed;
+
+    r = H5Dread(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data_out);
+    if(r<0) goto failed;
+
+    for(i=0;i<SIZE;i++){
+        if(data[i] != data_out[i]) goto failed;
+    }
+
+    fprintf(stdout, "Success!\n");
+
+    return_code = 0;
+
+    failed:
+
+    if(dset>0)  H5Dclose(dset);
+    if(sid>0)   H5Sclose(sid);
+    if(plist>0) H5Pclose(plist);
+    if(fid>0)   H5Fclose(fid);
+
+    return return_code;
+}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf.h
@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+**	lzf -- an extremely fast/free compression/decompression-method
+**	http://liblzf.plan9.de/
+**
+**	This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int 
+lzf_compress (const void *const in_data,  unsigned int in_len,
+              void             *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len);
+
+#endif
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzfP.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzfP.h
@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * Size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a bit faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more, up to 23).
+ */
+#ifndef HLOG
+# define HLOG 17  /* Avoid pathological case at HLOG=16   A.C. 2/15/09 */
+#endif
+
+/*
+ * Sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferred mode of operation.
+ */
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * Sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 1
+#endif
+
+/*
+ * Unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+#endif
+
+/*
+ * You may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks, and also makes compression
+ * deterministic/repeatable when the configuration otherwise is the same).
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/* =======================================================================
+    Changing things below this line may break the HDF5 LZF filter.
+    A.C. 2/15/09
+   =======================================================================
+*/
+
+/*
+ * Avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is neccessary. NOTE: this breaks
+ * the documentation in lzf.h.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Wether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*
+ * Wether to add extra checks for input validity in lzf_decompress
+ * and return EINVAL if the input stream has been corrupted. This
+ * only shields against overflowing the input buffer and will not
+ * detect most corrupted streams.
+ * This check is not normally noticable on modern hardware
+ * (<1% slowdown), but might slow down older cpus considerably.
+ */
+
+#ifndef CHECK_INPUT
+# define CHECK_INPUT 1
+#endif
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+typedef unsigned char u8;
+
+typedef const u8 *LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# include <limits.h>
+# if USHRT_MAX == 65535
+    typedef unsigned short u16;
+# elif UINT_MAX == 65535
+    typedef unsigned int u16;
+# else
+#  undef STRICT_ALIGN
+#  define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+#  undef VERY_FAST
+# endif
+#endif
+
+#if INIT_HTAB
+# ifdef __cplusplus
+#  include <cstring>
+# else
+#  include <string.h>
+# endif
+#endif
+
+#endif
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf_c.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf_c.c
@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
+# elif VERY_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define        MAX_LIT        (1 <<  5)
+#define        MAX_OFF        (1 << 13)
+#define        MAX_REF        ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value)         __builtin_expect ((expr),(value))
+# define inline                     inline
+#else
+# define expect(expr,value)         (expr)
+# define inline                     static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr)  expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1>    ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+	      void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+              , LZF_STATE htab
+#endif
+              )
+{
+#if !LZF_STATE_ARG
+  LZF_STATE htab;
+#endif
+  const u8 **hslot;
+  const u8 *ip = (const u8 *)in_data;
+        u8 *op = (u8 *)out_data;
+  const u8 *in_end  = ip + in_len;
+        u8 *out_end = op + out_len;
+  const u8 *ref;
+
+  /* off requires a type wide enough to hold a general pointer difference.
+   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+   * works for differences within a single object). We also assume that no
+   * no bit pattern traps. Since the only platform that is both non-POSIX
+   * and fails to support both assumptions is windows 64 bit, we make a
+   * special workaround for it.
+   */
+#if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64)
+  unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+  unsigned long off;
+#endif
+  unsigned int hval;
+  int lit;
+
+  if (!in_len || !out_len)
+    return 0;
+
+#if INIT_HTAB
+  memset (htab, 0, sizeof (htab));
+# if 0
+  for (hslot = htab; hslot < htab + HSIZE; hslot++)
+    *hslot++ = ip;
+# endif
+#endif
+
+  lit = 0; op++; /* start run */
+
+  hval = FRST (ip);
+  while (ip < in_end - 2)
+    {
+      hval = NEXT (hval, ip);
+      hslot = htab + IDX (hval);
+      ref = *hslot; *hslot = ip;
+
+      if (1
+#if INIT_HTAB
+          && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+          && (off = ip - ref - 1) < MAX_OFF
+          && ip + 4 < in_end
+          && ref > (u8 *)in_data
+#if STRICT_ALIGN
+          && ref[0] == ip[0]
+          && ref[1] == ip[1]
+          && ref[2] == ip[2]
+#else
+          && *(u16 *)ref == *(u16 *)ip
+          && ref[2] == ip[2]
+#endif
+        )
+        {
+          /* match found at *ref++ */
+          unsigned int len = 2;
+          unsigned int maxlen = in_end - ip - len;
+          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+          if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+            if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+              return 0;
+
+          op [- lit - 1] = lit - 1; /* stop run */
+          op -= !lit; /* undo run if length is zero */
+
+          for (;;)
+            {
+              if (expect_true (maxlen > 16))
+                {
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                }
+
+              do
+                len++;
+              while (len < maxlen && ref[len] == ip[len]);
+
+              break;
+            }
+
+          len -= 2; /* len is now #octets - 1 */
+          ip++;
+
+          if (len < 7)
+            {
+              *op++ = (off >> 8) + (len << 5);
+            }
+          else
+            {
+              *op++ = (off >> 8) + (  7 << 5);
+              *op++ = len - 7;
+            }
+
+          *op++ = off;
+          lit = 0; op++; /* start run */
+
+          ip += len + 1;
+
+          if (expect_false (ip >= in_end - 2))
+            break;
+
+#if ULTRA_FAST || VERY_FAST
+          --ip;
+# if VERY_FAST && !ULTRA_FAST
+          --ip;
+# endif
+          hval = FRST (ip);
+
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+# endif
+#else
+          ip -= len + 1;
+
+          do
+            {
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+            }
+          while (len--);
+#endif
+        }
+      else
+        {
+          /* one more literal byte we must copy */
+          if (expect_false (op >= out_end))
+            return 0;
+
+          lit++; *op++ = *ip++;
+
+          if (expect_false (lit == MAX_LIT))
+            {
+              op [- lit - 1] = lit - 1; /* stop run */
+              lit = 0; op++; /* start run */
+            }
+        }
+    }
+
+  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+    return 0;
+
+  while (ip < in_end)
+    {
+      lit++; *op++ = *ip++;
+
+      if (expect_false (lit == MAX_LIT))
+        {
+          op [- lit - 1] = lit - 1; /* stop run */
+          lit = 0; op++; /* start run */
+        }
+    }
+
+  op [- lit - 1] = lit - 1; /* end run */
+  op -= !lit; /* undo run if length is zero */
+
+  return op - (u8 *)out_data;
+}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf_d.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf/lzf_d.c
@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+/* ASM is slower than C in HDF5 tests -- A.C. 2/5/09
+#ifndef __STRICT_ANSI__
+#ifndef H5PY_DISABLE_LZF_ASM
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len)                \
+   asm ("rep movsb"                              \
+        : "=D" (dst), "=S" (src), "=c" (len)     \
+        :  "0" (dst),  "1" (src),  "2" (len));
+#endif
+#endif
+#endif
+*/
+
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len)
+{
+  u8 const *ip = (const u8 *)in_data;
+  u8       *op = (u8 *)out_data;
+  u8 const *const in_end  = ip + in_len;
+  u8       *const out_end = op + out_len;
+
+  do
+    {
+      unsigned int ctrl = *ip++;
+
+      if (ctrl < (1 << 5)) /* literal run */
+        {
+          ctrl++;
+
+          if (op + ctrl > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+#if CHECK_INPUT
+          if (ip + ctrl > in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+
+#ifdef lzf_movsb
+          lzf_movsb (op, ip, ctrl);
+#else
+          do
+            *op++ = *ip++;
+          while (--ctrl);
+#endif
+        }
+      else /* back reference */
+        {
+          unsigned int len = ctrl >> 5;
+
+          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+          if (ip >= in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+          if (len == 7)
+            {
+              len += *ip++;
+#if CHECK_INPUT
+              if (ip >= in_end)
+                {
+                  SET_ERRNO (EINVAL);
+                  return 0;
+                }
+#endif
+            }
+
+          ref -= *ip++;
+
+          if (op + len + 2 > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+          if (ref < (u8 *)out_data)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+
+#ifdef lzf_movsb
+          len += 2;
+          lzf_movsb (op, ref, len);
+#else
+          *op++ = *ref++;
+          *op++ = *ref++;
+
+          do
+            *op++ = *ref++;
+          while (--len);
+#endif
+        }
+    }
+  while (ip < in_end);
+
+  return op - (u8 *)out_data;
+}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf_filter.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf_filter.c
@ -0,0 +1,261 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Implements an LZF filter module for HDF5, using the BSD-licensed library
+    by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html).
+
+    No Python-specific code is used.  The filter behaves like the DEFLATE
+    filter, in that it is called for every type and space, and returns 0
+    if the data cannot be compressed.
+
+    The only public function is (int) register_lzf(void), which passes on
+    the result from H5Zregister.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include "hdf5.h"
+#include "lzf/lzf.h"
+#include "lzf_filter.h"
+
+/* Our own versions of H5Epush_sim, as it changed in 1.8 */
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7
+
+#define PUSH_ERR(func, minor, str)  H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+#define H5PY_GET_FILTER H5Pget_filter_by_id
+
+#else
+
+#define PUSH_ERR(func, minor, str)  H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+#define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL)
+
+#endif
+
+/*  Deal with the mutiple definitions for H5Z_class_t.
+    Note: Only HDF5 1.6 and 1.8 are supported.
+
+    (1) The old class should always be used for HDF5 1.6
+    (2) The new class should always be used for HDF5 1.8 < 1.8.3
+    (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the
+        macro H5_USE_16_API is set
+*/
+
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API)
+#define H5PY_H5Z_NEWCLS 1
+#else
+#define H5PY_H5Z_NEWCLS 0   
+#endif
+
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf);
+
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
+
+
+/* Try to register the filter, passing on the HDF5 return value */
+int register_lzf(void){
+
+    int retval;
+
+#if H5PY_H5Z_NEWCLS
+    H5Z_class_t filter_class = {
+        H5Z_CLASS_T_VERS,
+        (H5Z_filter_t)(H5PY_FILTER_LZF),
+        1, 1,
+        "lzf",
+        NULL,
+        (H5Z_set_local_func_t)(lzf_set_local),
+        (H5Z_func_t)(lzf_filter)
+    };
+#else
+    H5Z_class_t filter_class = {
+        (H5Z_filter_t)(H5PY_FILTER_LZF),
+        "lzf",
+        NULL,
+        (H5Z_set_local_func_t)(lzf_set_local),
+        (H5Z_func_t)(lzf_filter)
+    };
+#endif
+
+    retval = H5Zregister(&filter_class);
+    if(retval<0){
+        PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter");
+    }
+    return retval;
+}
+
+/*  Filter setup.  Records the following inside the DCPL:
+
+    1.  If version information is not present, set slots 0 and 1 to the filter
+        revision and LZF API version, respectively.
+
+    2. Compute the chunk size in bytes and store it in slot 2.
+*/
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){
+
+    int ndims;
+    int i;
+    herr_t r;
+
+    unsigned int bufsize;
+    hsize_t chunkdims[32];
+
+    unsigned int flags;
+    size_t nelements = 8;
+    unsigned values[] = {0,0,0,0,0,0,0,0};
+
+    r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL);
+    if(r<0) return -1;
+
+    if(nelements < 3) nelements = 3;  /* First 3 slots reserved.  If any higher
+                                      slots are used, preserve the contents. */
+
+    /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be
+       careful not to clobber any existing version info */
+    if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION;
+    if(values[1]==0) values[1] = LZF_VERSION;
+
+    ndims = H5Pget_chunk(dcpl, 32, chunkdims);
+    if(ndims<0) return -1;
+    if(ndims>32){
+        PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit");
+        return -1;
+    }
+
+    bufsize = H5Tget_size(type);
+    if(bufsize==0) return -1;
+
+    for(i=0;i<ndims;i++){
+        bufsize *= chunkdims[i];
+    }
+
+    values[2] = bufsize;
+
+#ifdef H5PY_LZF_DEBUG
+    fprintf(stderr, "LZF: Computed buffer size %d\n", bufsize);
+#endif
+
+    r = H5Pmodify_filter(dcpl, H5PY_FILTER_LZF, flags, nelements, values);
+    if(r<0) return -1;
+
+    return 1;
+}
+
+
+/* The filter function */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf){
+
+    void* outbuf = NULL;
+    size_t outbuf_size = 0;
+
+    unsigned int status = 0;        /* Return code from lzf routines */
+
+    /* We're compressing */
+    if(!(flags & H5Z_FLAG_REVERSE)){
+
+        /* Allocate an output buffer exactly as long as the input data; if
+           the result is larger, we simply return 0.  The filter is flagged
+           as optional, so HDF5 marks the chunk as uncompressed and
+           proceeds.
+        */
+
+        outbuf_size = (*buf_size);
+        outbuf = malloc(outbuf_size);
+
+        if(outbuf == NULL){
+            PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate compression buffer");
+            goto failed;
+        }
+
+        status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
+
+    /* We're decompressing */
+    } else {
+
+        if((cd_nelmts>=3)&&(cd_values[2]!=0)){
+            outbuf_size = cd_values[2];   /* Precomputed buffer guess */
+        }else{
+            outbuf_size = (*buf_size);
+        }
+
+#ifdef H5PY_LZF_DEBUG
+        fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size);
+#endif
+
+        while(!status){
+            
+            free(outbuf);
+            outbuf = malloc(outbuf_size);
+
+            if(outbuf == NULL){
+                PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer");
+                goto failed;
+            }
+
+            status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+
+            if(!status){    /* compression failed */
+
+                if(errno == E2BIG){
+                    outbuf_size += (*buf_size);
+#ifdef H5PY_LZF_DEBUG
+                    fprintf(stderr, "    Too small: %d\n", outbuf_size);
+#endif
+                } else if(errno == EINVAL) {
+
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
+                    goto failed;
+
+                } else {
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
+                    goto failed;
+                }
+
+            } /* if !status */
+
+        } /* while !status */
+
+    } /* compressing vs decompressing */
+
+    if(status != 0){
+
+        free(*buf);
+        *buf = outbuf;
+        *buf_size = outbuf_size;
+
+        return status;  /* Size of compressed/decompressed data */
+    } 
+
+    failed:
+
+    free(outbuf);
+    return 0;
+
+} /* End filter function */
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf_filter.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/lzf/lzf_filter.h
@ -0,0 +1,38 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+
+#ifndef H5PY_LZF_H
+#define H5PY_LZF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Filter revision number, starting at 1 */
+#define H5PY_FILTER_LZF_VERSION 4
+
+/* Filter ID registered with the HDF Group as of 2/6/09.  For maintenance
+   requests, contact the filter author directly. */
+#define H5PY_FILTER_LZF 32000
+
+/* Register the filter with the library. Returns a negative value on failure, 
+   and a non-negative value on success.
+*/
+int register_lzf(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/pyproject.toml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/pyproject.toml
@ -0,0 +1,10 @@
+# Include dependencies when building wheels on cibuildwheel
+[build-system]
+requires = [
+    "setuptools>=0.7",
+    "Cython>=0.19",
+    "oldest-supported-numpy",
+    "h5py>=2.4.0",
+]
+
+build-backend = "setuptools.build_meta"
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/requirements.txt
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/requirements.txt
@ -0,0 +1,5 @@
+# Order matters
+setuptools>=0.7
+Cython>=0.19
+numpy>=1.6.1
+h5py>=2.4.0
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/setup.cfg.example
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/setup.cfg.example
@ -0,0 +1,10 @@
+[install]
+# These control the installation of the hdf5 dynamically loaded filter plugin.
+h5plugin = 0
+h5plugin-dir = /usr/local/hdf5/lib/plugin
+
+[build_ext]
+# Whether to compile with OpenMP multi-threading. Default is system dependent:
+# False on OSX (since the clang compiler does not yet support OpenMP) and True
+# otherwise.
+omp = 1
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/setup.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/setup.py
@ -0,0 +1,443 @@
+from __future__ import absolute_import, division, print_function
+
+# I didn't import unicode_literals. They break setuptools or Cython in python
+# 2.7, but python 3 seems to be happy with them.
+
+import glob
+import os
+from os import path
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext as build_ext_
+from setuptools.command.develop import develop as develop_
+from setuptools.command.install import install as install_
+from Cython.Compiler.Main import default_options
+import shutil
+import subprocess
+import sys
+import platform
+
+
+VERSION_MAJOR = 0
+VERSION_MINOR = 5
+VERSION_POINT = 1
+# Define ZSTD macro for cython compilation
+default_options["compile_time_env"] = {"ZSTD_SUPPORT": False}
+
+# Only unset in the 'release' branch and in tags.
+VERSION_DEV = None
+
+VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT)
+if VERSION_DEV:
+    VERSION = VERSION + ".dev%d" % VERSION_DEV
+
+
+COMPILE_FLAGS = ["-O3", "-ffast-math", "-std=c99"]
+# Cython breaks strict aliasing rules.
+COMPILE_FLAGS += ["-fno-strict-aliasing"]
+COMPILE_FLAGS += ["-fPIC"]
+COMPILE_FLAGS_MSVC = ["/Ox", "/fp:fast"]
+
+MACROS = [
+    ("BSHUF_VERSION_MAJOR", VERSION_MAJOR),
+    ("BSHUF_VERSION_MINOR", VERSION_MINOR),
+    ("BSHUF_VERSION_POINT", VERSION_POINT),
+]
+
+
+H5PLUGINS_DEFAULT = "/usr/local/hdf5/lib/plugin"
+
+# OSX's clang compiler does not support OpenMP.
+if sys.platform == "darwin":
+    OMP_DEFAULT = False
+else:
+    OMP_DEFAULT = True
+
+# Build against the native architecture unless overridden by an environment variable
+# This can also be overridden by a direct command line argument, or a `setup.cfg` entry
+# This option is needed for the cibuildwheel action
+if "BITSHUFFLE_ARCH" in os.environ:
+    MARCH_DEFAULT = os.environ["BITSHUFFLE_ARCH"]
+else:
+    MARCH_DEFAULT = "native"
+
+FALLBACK_CONFIG = {
+    "include_dirs": [],
+    "library_dirs": [],
+    "libraries": [],
+    "extra_compile_args": [],
+    "extra_link_args": [],
+}
+
+if "HDF5_DIR" in os.environ:
+    FALLBACK_CONFIG["include_dirs"] += [os.environ["HDF5_DIR"] + "/include"]  # macports
+    FALLBACK_CONFIG["library_dirs"] += [os.environ["HDF5_DIR"] + "/lib"]  # macports
+elif sys.platform == "darwin":
+    # putting here both macports and homebrew paths will generate
+    # "ld: warning: dir not found" at the linking phase
+    FALLBACK_CONFIG["include_dirs"] += ["/opt/local/include"]  # macports
+    FALLBACK_CONFIG["library_dirs"] += ["/opt/local/lib"]  # macports
+    FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"]  # homebrew
+    FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"]  # homebrew
+elif sys.platform.startswith("freebsd"):
+    FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"]  # homebrew
+    FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"]  # homebrew
+
+FALLBACK_CONFIG["include_dirs"] = [
+    d for d in FALLBACK_CONFIG["include_dirs"] if path.isdir(d)
+]
+FALLBACK_CONFIG["library_dirs"] = [
+    d for d in FALLBACK_CONFIG["library_dirs"] if path.isdir(d)
+]
+
+FALLBACK_CONFIG["extra_compile_args"] = ["-DH5_BUILT_AS_DYNAMIC_LIB"]
+
+
+def pkgconfig(*packages, **kw):
+    config = kw.setdefault("config", {})
+    optional_args = kw.setdefault("optional", "")
+    flag_map = {
+        "include_dirs": ["--cflags-only-I", 2],
+        "library_dirs": ["--libs-only-L", 2],
+        "libraries": ["--libs-only-l", 2],
+        "extra_compile_args": ["--cflags-only-other", 0],
+        "extra_link_args": ["--libs-only-other", 0],
+    }
+    for package in packages:
+        try:
+            subprocess.check_output(["pkg-config", package])
+        except (subprocess.CalledProcessError, OSError):
+            print(
+                "Can't find %s with pkg-config fallback to " "static config" % package
+            )
+            for distutils_key in flag_map:
+                config.setdefault(distutils_key, []).extend(
+                    FALLBACK_CONFIG[distutils_key]
+                )
+            config["libraries"].append(package)
+        else:
+            for distutils_key, (pkg_option, n) in flag_map.items():
+                items = (
+                    subprocess.check_output(
+                        ["pkg-config", optional_args, pkg_option, package]
+                    )
+                    .decode("utf8")
+                    .split()
+                )
+                opt = config.setdefault(distutils_key, [])
+                opt.extend([i[n:] for i in items])
+    return config
+
+
+zstd_headers = ["zstd/lib/zstd.h"]
+zstd_lib = ["zstd/lib/"]
+zstd_sources = glob.glob("zstd/lib/common/*.c")
+zstd_sources += glob.glob("zstd/lib/compress/*.c")
+zstd_sources += glob.glob("zstd/lib/decompress/*.c")
+
+ext_bshuf = Extension(
+    "bitshuffle.ext",
+    sources=[
+        "bitshuffle/ext.pyx",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    include_dirs=["src/", "lz4/"],
+    depends=["src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "lz4/lz4.h"],
+    libraries=[],
+    define_macros=MACROS,
+)
+
+h5filter = Extension(
+    "bitshuffle.h5",
+    sources=[
+        "bitshuffle/h5.pyx",
+        "src/bshuf_h5filter.c",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    depends=[
+        "src/bitshuffle.h",
+        "src/bitshuffle_core.h",
+        "src/iochain.h",
+        "src/bshuf_h5filter.h",
+        "lz4/lz4.h",
+    ],
+    define_macros=MACROS + [("H5_USE_18_API", None)],
+    **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"]))
+)
+
+if not sys.platform.startswith("win"):
+    h5filter.sources.append("src/hdf5_dl.c")
+    h5filter.libraries.remove("hdf5")
+
+filter_plugin = Extension(
+    "bitshuffle.plugin.libh5bshuf",
+    sources=[
+        "src/bshuf_h5plugin.c",
+        "src/bshuf_h5filter.c",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    depends=[
+        "src/bitshuffle.h",
+        "src/bitshuffle_core.h",
+        "src/iochain.h",
+        "src/bshuf_h5filter.h",
+        "lz4/lz4.h",
+    ],
+    define_macros=MACROS,
+    **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"]))
+)
+
+lzf_plugin = Extension(
+    "bitshuffle.plugin.libh5LZF",
+    sources=[
+        "src/lzf_h5plugin.c",
+        "lzf/lzf_filter.c",
+        "lzf/lzf/lzf_c.c",
+        "lzf/lzf/lzf_d.c",
+    ],
+    depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h", "lzf/lzf/lzfP.h"],
+    **pkgconfig("hdf5", config=dict(include_dirs=["lzf/", "lzf/lzf/"]))
+)
+
+
+EXTENSIONS = [
+    ext_bshuf,
+]
+
+# Check for HDF5 support
+HDF5_FILTER_SUPPORT = False
+CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else []
+for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS:
+    if os.path.exists(os.path.join(p, "hdf5.h")):
+        HDF5_FILTER_SUPPORT = True
+
+if HDF5_FILTER_SUPPORT:
+    EXTENSIONS.append(h5filter)
+
+# Check for plugin hdf5 plugin support (hdf5 >= 1.8.11)
+HDF5_PLUGIN_SUPPORT = False
+CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else []
+for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS:
+    if os.path.exists(os.path.join(p, "H5PLextern.h")):
+        HDF5_PLUGIN_SUPPORT = True
+
+if HDF5_PLUGIN_SUPPORT:
+    EXTENSIONS.extend([filter_plugin, lzf_plugin])
+
+# For enabling ZSTD support when building wheels
+# This needs to be done after all Extensions have been added to EXTENSIONS
+if "ENABLE_ZSTD" in os.environ:
+    default_options["compile_time_env"] = {"ZSTD_SUPPORT": True}
+    for ext in EXTENSIONS:
+        if ext.name in [
+            "bitshuffle.ext",
+            "bitshuffle.h5",
+            "bitshuffle.plugin.libh5bshuf",
+        ]:
+            ext.sources += zstd_sources
+            ext.include_dirs += zstd_lib
+            ext.depends += zstd_headers
+            ext.define_macros += [("ZSTD_SUPPORT", 1)]
+
+
+class develop(develop_):
+    def run(self):
+        # Dummy directory for copying build plugins.
+        if not path.isdir("bitshuffle/plugin"):
+            os.mkdir("bitshuffle/plugin")
+        develop_.run(self)
+
+
+# Custom installation to include installing dynamic filters.
+class install(install_):
+    user_options = install_.user_options + [
+        ("h5plugin", None, "Install HDF5 filter plugins for use outside of python."),
+        (
+            "h5plugin-dir=",
+            None,
+            "Where to install filter plugins. Default %s." % H5PLUGINS_DEFAULT,
+        ),
+        ("zstd", None, "Install ZSTD support."),
+    ]
+
+    def initialize_options(self):
+        install_.initialize_options(self)
+        self.h5plugin = False
+        self.zstd = False
+        self.h5plugin_dir = H5PLUGINS_DEFAULT
+
+    def finalize_options(self):
+        install_.finalize_options(self)
+        if self.h5plugin not in ("0", "1", True, False):
+            raise ValueError("Invalid h5plugin argument. Must be '0' or '1'.")
+        self.h5plugin = int(self.h5plugin)
+        self.h5plugin_dir = path.abspath(self.h5plugin_dir)
+        self.zstd = self.zstd
+
+        # Add ZSTD files and macro to extensions if ZSTD enabled
+        if self.zstd:
+            default_options["compile_time_env"] = {"ZSTD_SUPPORT": True}
+            for ext in EXTENSIONS:
+                if ext.name in [
+                    "bitshuffle.ext",
+                    "bitshuffle.h5",
+                    "bitshuffle.plugin.libh5bshuf",
+                ]:
+                    ext.sources += zstd_sources
+                    ext.include_dirs += zstd_lib
+                    ext.depends += zstd_headers
+                    ext.define_macros += [("ZSTD_SUPPORT", 1)]
+
+    def run(self):
+        install_.run(self)
+        if self.h5plugin:
+            if not HDF5_PLUGIN_SUPPORT:
+                print("HDF5 < 1.8.11, not installing filter plugins.")
+                return
+            plugin_build = path.join(self.build_lib, "bitshuffle", "plugin")
+            try:
+                os.makedirs(self.h5plugin_dir)
+            except OSError as e:
+                if e.args[0] == 17:
+                    # Directory already exists, this is fine.
+                    pass
+                else:
+                    raise
+            plugin_libs = glob.glob(path.join(plugin_build, "*"))
+            for plugin_lib in plugin_libs:
+                plugin_name = path.split(plugin_lib)[1]
+                shutil.copy2(plugin_lib, path.join(self.h5plugin_dir, plugin_name))
+            print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir)
+
+
+# Command line or site.cfg specification of OpenMP.
+class build_ext(build_ext_):
+    user_options = build_ext_.user_options + [
+        (
+            "omp=",
+            None,
+            "Whether to compile with OpenMP threading. Default"
+            " on current system is %s." % str(OMP_DEFAULT),
+        ),
+        (
+            "march=",
+            None,
+            "Generate instructions for a specific machine type. Default is %s."
+            % MARCH_DEFAULT,
+        ),
+    ]
+    boolean_options = build_ext_.boolean_options + ["omp"]
+
+    def initialize_options(self):
+        build_ext_.initialize_options(self)
+        self.omp = OMP_DEFAULT
+        self.march = MARCH_DEFAULT
+
+    def finalize_options(self):
+        # For some reason this gets run twice. Careful to print messages and
+        # add arguments only one time.
+        build_ext_.finalize_options(self)
+
+        if self.omp not in ("0", "1", True, False):
+            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
+        self.omp = int(self.omp)
+
+        import numpy as np
+
+        ext_bshuf.include_dirs.append(np.get_include())
+
+        # Required only by old version of setuptools < 18.0
+        from Cython.Build import cythonize
+
+        self.extensions = cythonize(self.extensions)
+        for ext in self.extensions:
+            ext._needs_stub = False
+
+    def build_extensions(self):
+        c = self.compiler.compiler_type
+
+        # Set compiler flags including architecture
+        if self.compiler.compiler_type == "msvc":
+            openmpflag = "/openmp"
+            compileflags = COMPILE_FLAGS_MSVC
+        else:
+            openmpflag = "-fopenmp"
+            archi = platform.machine()
+            if archi in ("i386", "x86_64"):
+                compileflags = COMPILE_FLAGS + ["-march=%s" % self.march]
+            else:
+                compileflags = COMPILE_FLAGS + ["-mcpu=%s" % self.march]
+                if archi == "ppc64le":
+                    compileflags = COMPILE_FLAGS + ["-DNO_WARN_X86_INTRINSICS"]
+
+        if self.omp not in ("0", "1", True, False):
+            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
+        self.omp = int(self.omp)
+
+        # Add the appropriate OpenMP flags if needed
+        if self.omp:
+            if not hasattr(self, "_printed_omp_message"):
+                self._printed_omp_message = True
+                print("\n#################################")
+                print("# Compiling with OpenMP support #")
+                print("#################################\n")
+            # More portable to pass -fopenmp to linker.
+            # self.libraries += ['gomp']
+            compileflags += [openmpflag]
+            linkflags = [openmpflag]
+        else:
+            linkflags = []
+
+        # Add the compile/link options to each extension
+        for e in self.extensions:
+            e.extra_compile_args = list(set(e.extra_compile_args).union(compileflags))
+            e.extra_link_args = list(set(e.extra_link_args).union(linkflags))
+
+        build_ext_.build_extensions(self)
+
+
+# Don't install numpy/cython/hdf5 if not needed
+for cmd in ["sdist", "clean", "--help", "--help-commands", "--version"]:
+    if cmd in sys.argv:
+        setup_requires = []
+        break
+else:
+    setup_requires = ["Cython>=0.19", "numpy>=1.6.1"]
+
+with open("requirements.txt") as f:
+    requires = f.read().splitlines()
+    requires = [r.split()[0] for r in requires]
+
+with open("README.rst") as r:
+    long_description = r.read()
+
+# TODO hdf5 support should be an "extra". Figure out how to set this up.
+setup(
+    name="bitshuffle",
+    version=VERSION,
+    packages=["bitshuffle", "bitshuffle"],
+    scripts=[],
+    ext_modules=EXTENSIONS,
+    cmdclass={"build_ext": build_ext, "install": install, "develop": develop},
+    setup_requires=setup_requires,
+    install_requires=requires,
+    # extras_require={'H5':  ["h5py"]},
+    package_data={"": ["data/*"]},
+    # metadata for upload to PyPI
+    author="Kiyoshi Wesley Masui",
+    author_email="kiyo@physics.ubc.ca",
+    description="Bitshuffle filter for improving typed data compression.",
+    long_description=long_description,
+    license="MIT",
+    url="https://github.com/kiyo-masui/bitshuffle",
+    download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s" % VERSION),
+    keywords=["compression", "hdf5", "numpy"],
+)
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle.c
@ -0,0 +1,279 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include "bitshuffle.h"
+#include "bitshuffle_core.h"
+#include "bitshuffle_internals.h"
+#include "lz4.h"
+
+#ifdef ZSTD_SUPPORT
+#include "zstd.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+
+// Macros.
+#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) {                      \
+    free(buf); return count - 1000; }
+
+
+/* Bitshuffle and compress a single block. */
+int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *tmp_buf_bshuf;
+    void *tmp_buf_lz4;
+    size_t this_iter;
+    const void *in;
+    void *out;
+
+    tmp_buf_bshuf = malloc(size * elem_size);
+    if (tmp_buf_bshuf == NULL) return -1;
+
+    int dst_capacity = LZ4_compressBound(size * elem_size);
+    tmp_buf_lz4 = malloc(dst_capacity);
+    if (tmp_buf_lz4 == NULL){
+        free(tmp_buf_bshuf);
+        return -1;
+    }
+
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
+
+    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
+    if (count < 0) {
+        free(tmp_buf_lz4);
+        free(tmp_buf_bshuf);
+        return count;
+    }
+    nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity);
+    free(tmp_buf_bshuf);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
+
+    bshuf_write_uint32_BE(out, nbytes);
+    memcpy((char *) out + 4, tmp_buf_lz4, nbytes);
+
+    free(tmp_buf_lz4);
+
+    return nbytes + 4;
+}
+
+
+/* Decompress and bitunshuffle a single block. */
+int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *out, *tmp_buf;
+    const void *in;
+    size_t this_iter;
+    int32_t nbytes_from_header;
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    nbytes_from_header = bshuf_read_uint32_BE(in);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + nbytes_from_header + 4));
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
+                                 size * elem_size);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
+    if (nbytes != size * elem_size) {
+        free(tmp_buf);
+        return -91;
+    }
+    nbytes = nbytes_from_header;
+
+    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    nbytes += 4;
+
+    free(tmp_buf);
+    return nbytes;
+}
+
+#ifdef ZSTD_SUPPORT
+/* Bitshuffle and compress a single block. */
+int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \
+        const size_t size, const size_t elem_size, const int comp_lvl) {
+
+    int64_t nbytes, count;
+    void *tmp_buf_bshuf;
+    void *tmp_buf_zstd;
+    size_t this_iter;
+    const void *in;
+    void *out;
+
+    tmp_buf_bshuf = malloc(size * elem_size);
+    if (tmp_buf_bshuf == NULL) return -1;
+
+    size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size);
+    tmp_buf_zstd = malloc(tmp_buf_zstd_size);
+    if (tmp_buf_zstd == NULL){
+        free(tmp_buf_bshuf);
+        return -1;
+    }
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
+
+    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
+    if (count < 0) {
+        free(tmp_buf_zstd);
+        free(tmp_buf_bshuf);
+        return count;
+    }
+    nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf,  size * elem_size, comp_lvl);
+    free(tmp_buf_bshuf);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd);
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
+
+    bshuf_write_uint32_BE(out, nbytes);
+    memcpy((char *) out + 4, tmp_buf_zstd, nbytes);
+
+    free(tmp_buf_zstd);
+
+    return nbytes + 4;
+}
+
+
+/* Decompress and bitunshuffle a single block. */
+int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *out, *tmp_buf;
+    const void *in;
+    size_t this_iter;
+    int32_t nbytes_from_header;
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    nbytes_from_header = bshuf_read_uint32_BE(in);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + nbytes_from_header + 4));
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
+    if (nbytes != size * elem_size) {
+        free(tmp_buf);
+        return -91;
+    }
+
+    nbytes = nbytes_from_header;
+    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    nbytes += 4;
+
+    free(tmp_buf);
+    return nbytes;
+}
+#endif // ZSTD_SUPPORT
+
+
+/* ---- Public functions ----
+ *
+ * See header file for description and usage.
+ *
+ */
+
+size_t bshuf_compress_lz4_bound(const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    size_t bound, leftover;
+
+    if (block_size == 0) {
+        block_size = bshuf_default_block_size(elem_size);
+    }
+    if (block_size % BSHUF_BLOCKED_MULT) return -81;
+
+    // Note that each block gets a 4 byte header.
+    // Size of full blocks.
+    bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
+    // Size of partial blocks, if any.
+    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
+    if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
+    // Size of uncompressed data not fitting into any blocks.
+    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
+    return bound;
+}
+
+
+int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+
+int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+#ifdef ZSTD_SUPPORT
+size_t bshuf_compress_zstd_bound(const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    size_t bound, leftover;
+
+    if (block_size == 0) {
+        block_size = bshuf_default_block_size(elem_size);
+    }
+    if (block_size % BSHUF_BLOCKED_MULT) return -81;
+
+    // Note that each block gets a 4 byte header.
+    // Size of full blocks.
+    bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size);
+    // Size of partial blocks, if any.
+    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
+    if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4;
+    // Size of uncompressed data not fitting into any blocks.
+    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
+    return bound;
+}
+
+
+int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size, const int comp_lvl) {
+    return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size,
+            elem_size, block_size, comp_lvl);
+}
+
+
+int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+#endif // ZSTD_SUPPORT
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle.h
@ -0,0 +1,205 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Worker routines return an int64_t which is the number of bytes processed
+ * if positive or an error code if negative.
+ *
+ * Error codes:
+ *      -1    : Failed to allocate memory.
+ *      -11   : Missing SSE.
+ *      -12   : Missing AVX.
+ *      -80   : Input size not a multiple of 8.
+ *      -81   : block_size not multiple of 8.
+ *      -91   : Decompression error, wrong number of bytes processed.
+ *      -1YYY : Error internal to compression routine with error code -YYY.
+ */
+
+
+#ifndef BITSHUFFLE_H
+#define BITSHUFFLE_H
+
+#include <stdlib.h>
+#include "bitshuffle_core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * ---- LZ4 Interface ----
+ */
+
+/* ---- bshuf_compress_lz4_bound ----
+ *
+ * Bound on size of data compressed with *bshuf_compress_lz4*.
+ *
+ * Parameters
+ * ----------
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  Bound on compressed data size.
+ *
+ */
+size_t bshuf_compress_lz4_bound(const size_t size,
+        const size_t elem_size, size_t block_size);
+
+
+/* ---- bshuf_compress_lz4 ----
+ *
+ * Bitshuffled and compress the data using LZ4.
+ *
+ * Transpose within elements, in blocks of data of *block_size* elements then
+ * compress the blocks using LZ4.  In the output buffer, each block is prefixed
+ * by a 4 byte integer giving the compressed size of that block.
+ *
+ * Output buffer must be large enough to hold the compressed data.  This could
+ * be in principle substantially larger than the input buffer.  Use the routine
+ * *bshuf_compress_lz4_bound* to get an upper limit.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be large enough to hold data.
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes used in output buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t
+        elem_size, size_t block_size);
+
+
+/* ---- bshuf_decompress_lz4 ----
+ *
+ * Undo compression and bitshuffling.
+ *
+ * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must patch the parameters used to compress the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes consumed in *input* buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+/*
+ * ---- ZSTD Interface ----
+ */
+
+#ifdef ZSTD_SUPPORT
+/* ---- bshuf_compress_zstd_bound ----
+ *
+ * Bound on size of data compressed with *bshuf_compress_zstd*.
+ *
+ * Parameters
+ * ----------
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  Bound on compressed data size.
+ *
+ */
+size_t bshuf_compress_zstd_bound(const size_t size,
+        const size_t elem_size, size_t block_size);
+
+/* ---- bshuf_compress_zstd ----
+ *
+ * Bitshuffled and compress the data using zstd.
+ *
+ * Transpose within elements, in blocks of data of *block_size* elements then
+ * compress the blocks using ZSTD.  In the output buffer, each block is prefixed
+ * by a 4 byte integer giving the compressed size of that block.
+ *
+ * Output buffer must be large enough to hold the compressed data.  This could
+ * be in principle substantially larger than the input buffer.  Use the routine
+ * *bshuf_compress_zstd_bound* to get an upper limit.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be large enough to hold data.
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *  comp_lvl : compression level applied
+ *
+ * Returns
+ * -------
+ *  number of bytes used in output buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t
+        elem_size, size_t block_size, const int comp_lvl);
+
+
+/* ---- bshuf_decompress_zstd ----
+ *
+ * Undo compression and bitshuffling.
+ *
+ * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must patch the parameters used to compress the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes consumed in *input* buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+#endif // ZSTD_SUPPORT
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_H
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_core.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_core.c
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_core.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_core.h
@ -0,0 +1,182 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Worker routines return an int64_t which is the number of bytes processed
+ * if positive or an error code if negative.
+ *
+ * Error codes:
+ *      -1    : Failed to allocate memory.
+ *      -11   : Missing SSE.
+ *      -12   : Missing AVX.
+ *      -13   : Missing Arm Neon.
+ *      -14   : Missing AVX512.
+ *      -80   : Input size not a multiple of 8.
+ *      -81   : block_size not multiple of 8.
+ *      -91   : Decompression error, wrong number of bytes processed.
+ *      -1YYY : Error internal to compression routine with error code -YYY.
+ */
+
+
+#ifndef BITSHUFFLE_CORE_H
+#define BITSHUFFLE_CORE_H
+
+// We assume GNU g++ defining `__cplusplus` has stdint.h
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
+#include <stdint.h>
+#else
+  typedef unsigned char       uint8_t;
+  typedef unsigned short      uint16_t;
+  typedef unsigned int        uint32_t;
+  typedef   signed int        int32_t;
+  typedef unsigned long long  uint64_t;
+  typedef long long           int64_t;
+#endif
+
+#include <stdlib.h>
+
+
+// These are usually set in the setup.py.
+#ifndef BSHUF_VERSION_MAJOR
+#define BSHUF_VERSION_MAJOR 0
+#define BSHUF_VERSION_MINOR 4
+#define BSHUF_VERSION_POINT 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* --- bshuf_using_SSE2 ----
+ *
+ * Whether routines where compiled with the SSE2 instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using SSE2, 0 otherwise.
+ *
+ */
+int bshuf_using_SSE2(void);
+
+
+/* ---- bshuf_using_NEON ----
+ *
+ * Whether routines where compiled with the NEON instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using NEON, 0 otherwise.
+ *
+ */
+int bshuf_using_NEON(void);
+
+
+/* ---- bshuf_using_AVX2 ----
+ *
+ * Whether routines where compiled with the AVX2 instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using AVX2, 0 otherwise.
+ *
+ */
+int bshuf_using_AVX2(void);
+
+
+/* ---- bshuf_using_AVX512 ----
+ *
+ * Whether routines where compiled with the AVX512 instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using AVX512, 0 otherwise.
+ *
+ */
+int bshuf_using_AVX512(void);
+
+
+/* ---- bshuf_default_block_size ----
+ *
+ * The default block size as function of element size.
+ *
+ * This is the block size used by the blocked routines (any routine
+ * taking a *block_size* argument) when the block_size is not provided
+ * (zero is passed).
+ *
+ * The results of this routine are guaranteed to be stable such that
+ * shuffled/compressed data can always be decompressed.
+ *
+ * Parameters
+ * ----------
+ *  elem_size : element size of data to be shuffled/compressed.
+ *
+ */
+size_t bshuf_default_block_size(const size_t elem_size);
+
+
+/* ---- bshuf_bitshuffle ----
+ *
+ * Bitshuffle the data.
+ *
+ * Transpose the bits within elements, in blocks of *block_size*
+ * elements.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Do transpose in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes processed, negative error-code if failed.
+ *
+ */
+int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+
+/* ---- bshuf_bitunshuffle ----
+ *
+ * Unshuffle bitshuffled data.
+ *
+ * Untranspose the bits within elements, in blocks of *block_size*
+ * elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must match the parameters used to shuffle the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Do transpose in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes processed, negative error-code if failed.
+ *
+ */
+int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_CORE_H
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_internals.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bitshuffle_internals.h
@ -0,0 +1,75 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ */
+
+
+#ifndef BITSHUFFLE_INTERNALS_H
+#define BITSHUFFLE_INTERNALS_H
+
+// We assume GNU g++ defining `__cplusplus` has stdint.h
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
+#include <stdint.h>
+#else
+  typedef unsigned char       uint8_t;
+  typedef unsigned short      uint16_t;
+  typedef unsigned int        uint32_t;
+  typedef   signed int        int32_t;
+  typedef unsigned long long  uint64_t;
+  typedef long long           int64_t;
+#endif
+
+#include <stdlib.h>
+#include "iochain.h"
+
+
+// Constants.
+#ifndef BSHUF_MIN_RECOMMEND_BLOCK
+#define BSHUF_MIN_RECOMMEND_BLOCK 128
+#define BSHUF_BLOCKED_MULT 8    // Block sizes must be multiple of this.
+#define BSHUF_TARGET_BLOCK_SIZE_B 8192
+#endif
+
+
+// Macros.
+#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ---- Utility functions for internal use only ---- */
+
+int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
+        const size_t elem_size);
+
+/* Read a 32 bit unsigned integer from a buffer big endian order. */
+uint32_t bshuf_read_uint32_BE(const void* buf);
+
+/* Write a 32 bit unsigned integer to a buffer in big endian order. */
+void bshuf_write_uint32_BE(void* buf, uint32_t num);
+
+int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
+        const size_t elem_size);
+
+/* Function definition for worker functions that process a single block. */
+typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
+        const size_t size, const size_t elem_size, const int option);
+
+/* Wrap a function for processing a single block to process an entire buffer in
+ * parallel. */
+int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
+        const size_t size, const size_t elem_size, size_t block_size, const int option);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_INTERNALS_H
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5filter.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5filter.c
@ -0,0 +1,260 @@
+/*
+ * Bitshuffle HDF5 filter
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include "bitshuffle.h"
+#include "bshuf_h5filter.h"
+
+
+#define PUSH_ERR(func, minor, str)                                      \
+    H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+
+
+// Prototypes from bitshuffle.c
+void bshuf_write_uint64_BE(void* buf, uint64_t num);
+uint64_t bshuf_read_uint64_BE(void* buf);
+void bshuf_write_uint32_BE(void* buf, uint32_t num);
+uint32_t bshuf_read_uint32_BE(const void* buf);
+
+
+// Only called on compression, not on reverse.
+herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
+
+    herr_t r;
+    size_t ii;
+
+    unsigned int elem_size;
+
+    unsigned int flags;
+    size_t nelements = 8;
+    size_t nelem_max = 11;
+    unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
+    unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
+    char msg[80];
+
+    r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
+            tmp_values, 0, NULL, NULL);
+    if(r<0) return -1;
+
+    // First 3 slots reserved. Move any passed options to higher addresses.
+    for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
+        values[ii + 3] = tmp_values[ii];
+    }
+
+    nelements = 3 + nelements;
+
+    values[0] = BSHUF_VERSION_MAJOR;
+    values[1] = BSHUF_VERSION_MINOR;
+
+    elem_size = H5Tget_size(type);
+    if(elem_size <= 0) {
+        PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
+                "Invalid element size.");
+        return -1;
+    }
+
+    values[2] = elem_size;
+
+    // Validate user supplied arguments.
+    if (nelements > 3) {
+        if (values[3] % 8 || values[3] < 0) {
+            sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
+                    values[3]);
+            PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
+            return -1;
+        }
+    }
+    if (nelements > 4) {
+        switch (values[4]) {
+            case 0:
+                break;
+            case BSHUF_H5_COMPRESS_LZ4:
+                break;
+            #ifdef ZSTD_SUPPORT
+            case BSHUF_H5_COMPRESS_ZSTD:
+                break;
+            #endif
+            default:
+                PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
+                         "Invalid bitshuffle compression.");
+        }
+    }
+
+    r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
+    if(r<0) return -1;
+
+    return 1;
+}
+
+
+size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
+           const unsigned int cd_values[], size_t nbytes,
+           size_t *buf_size, void **buf) {
+
+    size_t size, elem_size;
+    int err = -1;
+    char msg[80];
+    size_t block_size = 0;
+    size_t buf_size_out, nbytes_uncomp, nbytes_out;
+    char* in_buf = *buf;
+    void *out_buf;
+
+    if (cd_nelmts < 3) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Not enough parameters.");
+        return 0;
+    }
+    elem_size = cd_values[2];
+#ifdef ZSTD_SUPPORT
+    const int comp_lvl = cd_values[5]; 
+#endif
+
+    // User specified block size.
+    if (cd_nelmts > 3) block_size = cd_values[3];
+
+    if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
+    
+#ifndef ZSTD_SUPPORT
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "ZSTD compression filter chosen but ZSTD support not installed.");
+        return 0;
+    }
+#endif
+
+    // Compression in addition to bitshuffle.
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        if (flags & H5Z_FLAG_REVERSE) {
+            // First eight bytes is the number of bytes in the output buffer,
+            // little endian.
+            nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
+            // Override the block size with the one read from the header.
+            block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
+            // Skip over the header.
+            in_buf += 12;
+            buf_size_out = nbytes_uncomp;
+        } else {
+            nbytes_uncomp = nbytes;
+            // Pick which compressions library to use
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+              buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, 
+                  elem_size, block_size) + 12;
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+              buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size, 
+                  elem_size, block_size) + 12;
+            }
+#endif
+        }
+    } else {
+        nbytes_uncomp = nbytes;
+        buf_size_out = nbytes;
+    }
+
+    // TODO, remove this restriction by memcopying the extra.
+    if (nbytes_uncomp % elem_size) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Non integer number of elements.");
+        return 0;
+    }
+    size = nbytes_uncomp / elem_size;
+
+    out_buf = malloc(buf_size_out);
+    if (out_buf == NULL) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Could not allocate output buffer.");
+        return 0;
+    }
+
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        if (flags & H5Z_FLAG_REVERSE) {
+            // Bit unshuffle/decompress.
+            // Pick which compressions library to use
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+              err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+              err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size);
+            }
+#endif
+            nbytes_out = nbytes_uncomp;
+        } else {
+            // Bit shuffle/compress.
+            // Write the header, described in
+            // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
+            // Technically we should be using signed integers instead of
+            // unsigned ones, however for valid inputs (positive numbers) these
+            // have the same representation.
+            bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
+            bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+                err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
+                        elem_size, block_size); 
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+                err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size,
+                        elem_size, block_size, comp_lvl); 
+            }
+#endif
+            nbytes_out = err + 12;
+        } 
+    } else {
+            if (flags & H5Z_FLAG_REVERSE) {
+            // Bit unshuffle.
+            err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
+                    block_size); } else {
+            // Bit shuffle.
+            err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
+                    block_size); } nbytes_out = nbytes; }
+    //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
+    //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
+
+    if (err < 0) {
+        sprintf(msg, "Error in bitshuffle with error code %d.", err);
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
+        free(out_buf);
+        return 0;
+    } else {
+        free(*buf);
+        *buf = out_buf;
+        *buf_size = buf_size_out;
+
+        return nbytes_out;
+    }
+}
+
+
+
+H5Z_class_t bshuf_H5Filter[1] = {{
+    H5Z_CLASS_T_VERS,
+    (H5Z_filter_t)(BSHUF_H5FILTER),
+    1, 1,
+    "bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
+    NULL,
+    (H5Z_set_local_func_t)(bshuf_h5_set_local),
+    (H5Z_func_t)(bshuf_h5_filter)
+}};
+
+
+int bshuf_register_h5filter(void){
+
+    int retval;
+
+    retval = H5Zregister(bshuf_H5Filter);
+    if(retval<0){
+        PUSH_ERR("bshuf_register_h5filter",
+                 H5E_CANTREGISTER, "Can't register bitshuffle filter");
+    }
+    return retval;
+}
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5filter.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5filter.h
@ -0,0 +1,67 @@
+/*
+ * Bitshuffle HDF5 filter
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Filter Options
+ * --------------
+ *  block_size (option slot 0) : integer (optional)
+ *      What block size to use (in elements not bytes). Default is 0,
+ *      for which bitshuffle will pick a block size with a target of 8kb.
+ *  Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
+ *      Whether to apply LZ4 compression to the data after bitshuffling.
+ *      This is much faster than applying compression as a second filter
+ *      because it is done when the small block of data is already in the
+ *      L1 cache.
+ *
+ *      For LZ4 compression, the compressed format of the data is the same as
+ *      for the normal LZ4 filter described in
+ *      http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
+ *
+ */
+
+
+#ifndef BSHUF_H5FILTER_H
+#define BSHUF_H5FILTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define H5Z_class_t_vers 2
+#include "hdf5.h"
+
+
+#define BSHUF_H5FILTER 32008
+
+
+#define BSHUF_H5_COMPRESS_LZ4 2
+#define BSHUF_H5_COMPRESS_ZSTD 3
+
+
+extern H5Z_class_t bshuf_H5Filter[1];
+
+
+/* ---- bshuf_register_h5filter ----
+ *
+ * Register the bitshuffle HDF5 filter within the HDF5 library.
+ *
+ * Call this before using the bitshuffle HDF5 filter from C unless
+ * using dynamically loaded filters.
+ *
+ */
+int bshuf_register_h5filter(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // BSHUF_H5FILTER_H
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5plugin.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/bshuf_h5plugin.c
@ -0,0 +1,19 @@
+/*
+ * Dynamically loaded filter plugin for HDF5 Bitshuffle filter.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+
+#include "bshuf_h5filter.h"
+#include "H5PLextern.h"
+
+H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
+const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/hdf5_dl.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/hdf5_dl.c
@ -0,0 +1,358 @@
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+/* This provides replacement for HDF5 functions/variables used by filters.
+ *
+ * Those replacement provides no-op functions by default and if init_filter
+ * is called it provides access to HDF5 functions/variables through dynamic
+ * loading.
+ * This is useful on Linux/macOS to avoid linking the plugin with a dedicated
+ * HDF5 library.
+ */
+#include <stdarg.h>
+#include <dlfcn.h>
+#include <stdbool.h>
+#include "hdf5.h"
+
+
+/*Function types*/
+/*H5*/
+typedef herr_t (*DL_func_H5open)(void);
+/*H5E*/
+typedef herr_t (* DL_func_H5Epush1)(
+    const char *file, const char *func, unsigned line,
+    H5E_major_t maj, H5E_minor_t min, const char *str);
+typedef herr_t (* DL_func_H5Epush2)(
+    hid_t err_stack, const char *file, const char *func, unsigned line,
+    hid_t cls_id, hid_t maj_id, hid_t min_id, const char *msg, ...);
+/*H5P*/
+typedef herr_t (* DL_func_H5Pget_filter_by_id2)(hid_t plist_id, H5Z_filter_t id,
+    unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/,
+    unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/,
+    unsigned *filter_config/*out*/);
+typedef int (* DL_func_H5Pget_chunk)(
+	hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/);
+typedef herr_t (* DL_func_H5Pmodify_filter)(
+    hid_t plist_id, H5Z_filter_t filter,
+    unsigned int flags, size_t cd_nelmts,
+    const unsigned int cd_values[/*cd_nelmts*/]);
+/*H5T*/
+typedef size_t (* DL_func_H5Tget_size)(
+    hid_t type_id);
+typedef H5T_class_t (* DL_func_H5Tget_class)(hid_t type_id);
+typedef hid_t (* DL_func_H5Tget_super)(hid_t type);
+typedef herr_t (* DL_func_H5Tclose)(hid_t type_id);
+/*H5Z*/
+typedef herr_t (* DL_func_H5Zregister)(
+    const void *cls);
+
+
+static struct {
+    /*H5*/
+    DL_func_H5open H5open;
+    /*H5E*/
+    DL_func_H5Epush1 H5Epush1;
+    DL_func_H5Epush2 H5Epush2;
+    /*H5P*/
+    DL_func_H5Pget_filter_by_id2 H5Pget_filter_by_id2;
+    DL_func_H5Pget_chunk H5Pget_chunk;
+    DL_func_H5Pmodify_filter H5Pmodify_filter;
+    /*H5T*/
+    DL_func_H5Tget_size H5Tget_size;
+    DL_func_H5Tget_class H5Tget_class;
+    DL_func_H5Tget_super H5Tget_super;
+    DL_func_H5Tclose H5Tclose;
+    /*H5T*/
+    DL_func_H5Zregister H5Zregister;
+} DL_H5Functions = {
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+
+static struct {
+    /*HDF5 variables*/
+    void *h5e_cantregister_ptr;
+    void *h5e_callback_ptr;
+    void *h5e_pline_ptr;
+    void *h5e_err_cls_ptr;
+} H5Variables_ptr = {
+    NULL, NULL, NULL, NULL};
+
+/*HDF5 variables*/
+hid_t H5E_CANTREGISTER_g = -1;
+hid_t H5E_CALLBACK_g = -1;
+hid_t H5E_PLINE_g = -1;
+hid_t H5E_ERR_CLS_g = -1;
+
+
+static bool is_init = false;
+
+/*
+ * Try to find a symbol within a library
+ * 
+ * handle: Handle to the library
+ * symbol: Symbol to look for
+ * Returns: a pointer to the symbol or NULL 
+ * if the symbol can't be found 
+ */
+void *find_sym(void *handle, const char *symbol) {
+
+  void *ret = NULL, *err = NULL;
+  dlerror(); /* clear error code */
+  ret = dlsym(handle, symbol);
+
+  if(ret != NULL && (err = dlerror()) == NULL)
+    return ret;
+  else
+    return NULL;
+}
+
+/*
+ * Check that all symbols have been loaded
+ * 
+ * Returns: -1 if an error occured, 0 for success
+ */
+int check_symbols() {
+
+  if(DL_H5Functions.H5open == NULL)
+    return -1;
+
+  /*H5E*/
+  if(DL_H5Functions.H5Epush1 == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Epush2 == NULL)
+    return -1;
+
+  /*H5P*/
+  if(DL_H5Functions.H5Pget_filter_by_id2 == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Pget_chunk == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Pmodify_filter == NULL)
+    return -1;
+
+  /*H5T*/
+  if(DL_H5Functions.H5Tget_size == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Tget_class == NULL)
+    return -1;
+  
+  if(DL_H5Functions.H5Tget_super == NULL)
+    return -1;
+  
+  if(DL_H5Functions.H5Tclose == NULL)
+    return -1;
+
+  /*H5Z*/
+  if(DL_H5Functions.H5Zregister == NULL)
+    return -1;
+
+  /*Variables*/
+  if(H5Variables_ptr.h5e_cantregister_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_callback_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_pline_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_err_cls_ptr == NULL)
+    return -1;
+
+  return 0;
+
+}
+
+/* Initialize the dynamic loading of symbols and register the plugin
+ *
+ * libname: Name of the DLL from which to load libHDF5 symbols
+ * Returns: -1 if an error occured, 0 for success
+ */
+int init_filter(const char *libname)
+{
+    int retval = -1;
+  	void *handle = NULL;
+
+    handle = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
+
+    if (handle != NULL) {
+        /*H5*/
+        if(DL_H5Functions.H5open == NULL)
+            // find_sym will return NULL if it fails so no need to check return ptr
+            DL_H5Functions.H5open = (DL_func_H5open)find_sym(handle, "H5open");
+
+        /*H5E*/
+        if(DL_H5Functions.H5Epush1 == NULL)
+            DL_H5Functions.H5Epush1 = (DL_func_H5Epush1)find_sym(handle, "H5Epush1");
+
+        if(DL_H5Functions.H5Epush2 == NULL)
+            DL_H5Functions.H5Epush2 = (DL_func_H5Epush2)find_sym(handle, "H5Epush2");
+
+        /*H5P*/
+        if(DL_H5Functions.H5Pget_filter_by_id2 == NULL)
+            DL_H5Functions.H5Pget_filter_by_id2 = 
+              (DL_func_H5Pget_filter_by_id2)find_sym(handle, "H5Pget_filter_by_id2");
+
+        if(DL_H5Functions.H5Pget_chunk == NULL)
+            DL_H5Functions.H5Pget_chunk = (DL_func_H5Pget_chunk)find_sym(handle, "H5Pget_chunk");
+
+        if(DL_H5Functions.H5Pmodify_filter == NULL)
+            DL_H5Functions.H5Pmodify_filter = 
+              (DL_func_H5Pmodify_filter)find_sym(handle, "H5Pmodify_filter");
+
+        /*H5T*/
+        if(DL_H5Functions.H5Tget_size == NULL)
+            DL_H5Functions.H5Tget_size = (DL_func_H5Tget_size)find_sym(handle, "H5Tget_size");
+
+        if(DL_H5Functions.H5Tget_class == NULL)
+            DL_H5Functions.H5Tget_class = (DL_func_H5Tget_class)find_sym(handle, "H5Tget_class");
+       
+        if(DL_H5Functions.H5Tget_super == NULL)
+            DL_H5Functions.H5Tget_super = (DL_func_H5Tget_super)find_sym(handle, "H5Tget_super");
+        
+        if(DL_H5Functions.H5Tclose == NULL)
+            DL_H5Functions.H5Tclose = (DL_func_H5Tclose)find_sym(handle, "H5Tclose");
+
+        /*H5Z*/
+        if(DL_H5Functions.H5Zregister == NULL)
+            DL_H5Functions.H5Zregister = (DL_func_H5Zregister)find_sym(handle, "H5Zregister");
+
+        /*Variables*/
+        if(H5Variables_ptr.h5e_cantregister_ptr == NULL)
+            H5Variables_ptr.h5e_cantregister_ptr = find_sym(handle, "H5E_CANTREGISTER_g");
+
+        if(H5Variables_ptr.h5e_callback_ptr == NULL)
+            H5Variables_ptr.h5e_callback_ptr = find_sym(handle, "H5E_CALLBACK_g");
+
+        if(H5Variables_ptr.h5e_pline_ptr == NULL)
+            H5Variables_ptr.h5e_pline_ptr = find_sym(handle, "H5E_PLINE_g");
+
+        if(H5Variables_ptr.h5e_err_cls_ptr == NULL)
+            H5Variables_ptr.h5e_err_cls_ptr = find_sym(handle, "H5E_ERR_CLS_g");
+
+        retval = check_symbols();
+        if(!retval) {
+            H5E_CANTREGISTER_g = *((hid_t *)H5Variables_ptr.h5e_cantregister_ptr);
+            H5E_CALLBACK_g = *((hid_t *)H5Variables_ptr.h5e_callback_ptr);
+            H5E_PLINE_g = *((hid_t *)H5Variables_ptr.h5e_pline_ptr);
+            H5E_ERR_CLS_g = *((hid_t *)H5Variables_ptr.h5e_err_cls_ptr);
+            is_init = true;
+        }
+    }
+
+    return retval;
+};
+
+
+#define CALL(fallback, func, ...)\
+    if(DL_H5Functions.func != NULL) {\
+        return DL_H5Functions.func(__VA_ARGS__);\
+    } else {\
+        return fallback;\
+    }
+
+
+/*Function wrappers*/
+/*H5*/
+herr_t H5open(void)
+{
+CALL(0, H5open)
+};
+
+/*H5E*/
+herr_t H5Epush1(const char *file, const char *func, unsigned line,
+    H5E_major_t maj, H5E_minor_t min, const char *str)
+{
+CALL(0, H5Epush1, file, func, line, maj, min, str)
+}
+
+herr_t H5Epush2(hid_t err_stack, const char *file, const char *func, unsigned line,
+    hid_t cls_id, hid_t maj_id, hid_t min_id, const char *fmt, ...)
+{
+    if(DL_H5Functions.H5Epush2 != NULL) {
+        /* Avoid using variadic: convert fmt+ ... to a message sting */
+        va_list ap;
+        char msg_string[256];  /*Buffer hopefully wide enough*/
+
+        va_start(ap, fmt);
+        vsnprintf(msg_string, sizeof(msg_string), fmt, ap);
+        msg_string[sizeof(msg_string) - 1] = '\0';
+        va_end(ap);
+
+        return DL_H5Functions.H5Epush2(err_stack, file, func, line, cls_id, maj_id, min_id, msg_string);
+    } else {
+        return 0;
+    }
+}
+
+/*H5P*/
+herr_t H5Pget_filter_by_id2(hid_t plist_id, H5Z_filter_t id,
+    unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/,
+    unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/,
+    unsigned *filter_config/*out*/)
+{
+CALL(0, H5Pget_filter_by_id2, plist_id, id, flags, cd_nelmts, cd_values, namelen, name, filter_config)
+}
+
+int H5Pget_chunk(hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/)
+{
+CALL(0, H5Pget_chunk, plist_id, max_ndims, dim)
+}
+
+herr_t H5Pmodify_filter(hid_t plist_id, H5Z_filter_t filter,
+    unsigned int flags, size_t cd_nelmts,
+    const unsigned int cd_values[/*cd_nelmts*/])
+{
+CALL(0, H5Pmodify_filter, plist_id, filter, flags, cd_nelmts, cd_values)
+}
+
+/*H5T*/
+size_t H5Tget_size(hid_t type_id)
+{
+CALL(0, H5Tget_size, type_id)
+}
+
+H5T_class_t H5Tget_class(hid_t type_id)
+{
+CALL(H5T_NO_CLASS, H5Tget_class, type_id)
+}
+
+
+hid_t H5Tget_super(hid_t type)
+{
+CALL(0, H5Tget_super, type)
+}
+
+herr_t H5Tclose(hid_t type_id)
+{
+CALL(0, H5Tclose, type_id)
+}
+
+/*H5Z*/
+herr_t H5Zregister(const void *cls)
+{
+CALL(-1, H5Zregister, cls)
+}
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/iochain.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/iochain.c
@ -0,0 +1,90 @@
+/*
+ * IOchain - Distribute a chain of dependent IO events among threads.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include <stdlib.h>
+#include "iochain.h"
+
+
+void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) {
+#ifdef _OPENMP
+    omp_init_lock(&C->next_lock);
+    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
+        omp_init_lock(&(C->in_pl[ii].lock));
+        omp_init_lock(&(C->out_pl[ii].lock));
+    }
+#endif
+    C->next = 0;
+    C->in_pl[0].ptr = in_ptr_0;
+    C->out_pl[0].ptr = out_ptr_0;
+}
+
+
+void ioc_destroy(ioc_chain *C) {
+#ifdef _OPENMP
+    omp_destroy_lock(&C->next_lock);
+    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
+        omp_destroy_lock(&(C->in_pl[ii].lock));
+        omp_destroy_lock(&(C->out_pl[ii].lock));
+    }
+#endif
+}
+
+
+const void * ioc_get_in(ioc_chain *C, size_t *this_iter) {
+#ifdef _OPENMP
+    omp_set_lock(&C->next_lock);
+    #pragma omp flush
+#endif
+    *this_iter = C->next;
+    C->next ++;
+#ifdef _OPENMP
+    omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock));
+    omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    omp_unset_lock(&C->next_lock);
+#endif
+    return C->in_pl[*this_iter % IOC_SIZE].ptr;
+}
+
+
+void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) {
+    C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
+#endif
+}
+
+
+void * ioc_get_out(ioc_chain *C, size_t *this_iter) {
+#ifdef _OPENMP
+    omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
+    #pragma omp flush
+#endif
+    void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
+#endif
+    return out_ptr;
+}
+
+
+void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
+    C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    // *in_pl[this_iter]* lock released at the end of the iteration to avoid being
+    // overtaken by previous threads and having *out_pl[this_iter]* corrupted.
+    // Especially worried about thread 0, iteration 0.
+    omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
+#endif
+}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/iochain.h
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/iochain.h
@ -0,0 +1,94 @@
+/*
+ * IOchain - Distribute a chain of dependent IO events among threads.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Similar in concept to a queue. Each task includes reading an input
+ * and writing output, but the location of the input/output (the pointers)
+ * depend on the previous item in the chain.
+ *
+ * This is designed for parallelizing blocked compression/decompression IO,
+ * where the destination of a compressed block depends on the compressed size
+ * of all previous blocks.
+ *
+ * Implemented with OpenMP locks.
+ *
+ *
+ * Usage
+ * -----
+ *  - Call `ioc_init` in serial block.
+ *  - Each thread should create a local variable *size_t this_iter* and 
+ *    pass its address to all function calls. Its value will be set
+ *    inside the functions and is used to identify the thread.
+ *  - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
+ *    exactly once per iteration, starting with `ioc_get_in` and ending
+ *    with `ioc_set_next_out`.
+ *  - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`,
+ *    `ioc_set_next_out`, *work*) is most efficient.
+ *  - Have each thread call `ioc_end_pop`.
+ *  - `ioc_get_in` is blocked until the previous entry's
+ *    `ioc_set_next_in` is called.
+ *  - `ioc_get_out` is blocked until the previous entry's
+ *    `ioc_set_next_out` is called.
+ *  - There are no blocks on the very first iteration.
+ *  - Call `ioc_destroy` in serial block.
+ *  - Safe for num_threads >= IOC_SIZE (but less efficient).
+ *
+ */
+
+
+#ifndef IOCHAIN_H
+#define IOCHAIN_H
+
+
+#include <stdlib.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+
+#define IOC_SIZE 33
+
+
+typedef struct ioc_ptr_and_lock {
+#ifdef _OPENMP
+    omp_lock_t lock;
+#endif
+    void *ptr;
+} ptr_and_lock;
+
+typedef struct ioc_const_ptr_and_lock {
+#ifdef _OPENMP
+    omp_lock_t lock;
+#endif
+    const void *ptr;
+} const_ptr_and_lock;
+
+
+typedef struct ioc_chain {
+#ifdef _OPENMP
+    omp_lock_t next_lock;
+#endif
+    size_t next;
+    const_ptr_and_lock in_pl[IOC_SIZE];
+    ptr_and_lock out_pl[IOC_SIZE];
+} ioc_chain;
+
+
+void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0);
+void ioc_destroy(ioc_chain *C);
+const void * ioc_get_in(ioc_chain *C, size_t *this_iter);
+void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
+void * ioc_get_out(ioc_chain *C, size_t *this_iter);
+void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
+
+#endif  // IOCHAIN_H
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/lzf_h5plugin.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/src/lzf_h5plugin.c
@ -0,0 +1,42 @@
+/*
+ * Dynamically loaded filter plugin for HDF5 LZF filter.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+
+#define H5Z_class_t_vers 2
+#include "lzf_filter.h"
+#include "H5PLextern.h"
+
+#include <stdint.h>
+
+
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+                  const unsigned cd_values[], size_t nbytes,
+                  size_t *buf_size, void **buf);
+
+
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
+
+
+H5Z_class_t lzf_H5Filter[1] = {{
+    H5Z_CLASS_T_VERS,
+    (H5Z_filter_t)(H5PY_FILTER_LZF),
+    1, 1,
+    "lzf",
+    NULL,
+    (H5Z_set_local_func_t)(lzf_set_local),
+    (H5Z_func_t)(lzf_filter)
+}};
+
+
+H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
+const void* H5PLget_plugin_info(void) {return lzf_H5Filter;}
+
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/data/regression_0.1.3.h5
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/data/regression_0.1.3.h5
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/data/regression_0.4.0.h5
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/data/regression_0.4.0.h5
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/make_regression_tdata.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/make_regression_tdata.py
@ -0,0 +1,69 @@
+"""
+Script to create data used for regression testing.
+
+"""
+
+import numpy as np
+from numpy import random
+import h5py
+
+import bitshuffle
+from bitshuffle import h5
+from h5py import h5z
+
+BLOCK_SIZE = 64  # Smallish such that datasets have many blocks but are small.
+COMP_LVL = 10  # ZSTD compression level
+FILTER_PIPELINE = [h5.H5FILTER]
+FILTER_OPTS = [
+    [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)],
+    [(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)],
+]
+
+OUT_FILE = "tests/data/regression_%s.h5" % bitshuffle.__version__
+
+DTYPES = ["a1", "a2", "a3", "a4", "a6", "a8", "a10"]
+
+f = h5py.File(OUT_FILE, "w")
+g_orig = f.create_group("original")
+g_comp_lz4 = f.create_group("compressed")
+g_comp_zstd = f.create_group("compressed_zstd")
+
+for dtype in DTYPES:
+    for rep in ["a", "b", "c"]:
+        dset_name = "%s_%s" % (dtype, rep)
+        dtype = np.dtype(dtype)
+        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
+        shape = (n_elem,)
+        chunks = shape
+        data = random.randint(0, 255, n_elem * dtype.itemsize)
+        data = data.astype(np.uint8).view(dtype)
+
+        g_orig.create_dataset(dset_name, data=data)
+
+        # Create LZ4 compressed data
+        h5.create_dataset(
+            g_comp_lz4,
+            bytes(dset_name, "utf-8"),
+            shape,
+            dtype,
+            chunks=chunks,
+            filter_pipeline=FILTER_PIPELINE,
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=FILTER_OPTS[0],
+        )
+        g_comp_lz4[dset_name][:] = data
+
+        # Create ZSTD compressed data
+        h5.create_dataset(
+            g_comp_zstd,
+            bytes(dset_name, "utf-8"),
+            shape,
+            dtype,
+            chunks=chunks,
+            filter_pipeline=FILTER_PIPELINE,
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=FILTER_OPTS[1],
+        )
+        g_comp_zstd[dset_name][:] = data
+
+f.close()
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_ext.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_ext.py
@ -0,0 +1,720 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+import time
+
+import numpy as np
+from numpy import random
+
+from bitshuffle import ext, __zstd__
+
+
+# If we are doing timeings by what factor to increase workload.
+# Remember to change `ext.REPEATC`.
+TIME = 0
+# TIME = 8    # 8kB blocks same as final blocking.
+BLOCK = 1024
+
+
+TEST_DTYPES = [
+    np.uint8,
+    np.uint16,
+    np.int32,
+    np.uint64,
+    np.float32,
+    np.float64,
+    np.complex128,
+]
+TEST_DTYPES += [b"a3", b"a5", b"a6", b"a7", b"a9", b"a11", b"a12", b"a24", b"a48"]
+
+
+class TestProfile(unittest.TestCase):
+    def setUp(self):
+        n = 1024  # bytes.
+        if TIME:
+            n *= TIME
+        # Almost random bits, but now quite. All bits exercised (to fully test
+        # transpose) but still slightly compressible.
+        self.data = random.randint(0, 200, n).astype(np.uint8)
+        self.fun = ext.copy
+        self.check = None
+        self.check_data = None
+        self.case = "None"
+
+    def tearDown(self):
+        """Performs all tests and timings."""
+        if TIME:
+            reps = 10
+        else:
+            reps = 1
+        delta_ts = []
+        try:
+            for ii in range(reps):
+                t0 = time.time()
+                out = self.fun(self.data)
+                delta_ts.append(time.time() - t0)
+        except RuntimeError as err:
+            if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -14) and not ext.using_AVX512():
+                return
+            else:
+                raise
+        delta_t = min(delta_ts)
+        size_i = self.data.size * self.data.dtype.itemsize
+        size_o = out.size * out.dtype.itemsize
+        size = max([size_i, size_o])
+        speed = ext.REPEAT * size / delta_t / 1024**3  # GB/s
+        if TIME:
+            print("%-20s: %5.2f s/GB,   %5.2f GB/s" % (self.case, 1.0 / speed, speed))
+        if self.check is not None:
+            ans = self.check(self.data).view(np.uint8)
+            self.assertTrue(np.all(ans == out.view(np.uint8)))
+        if self.check_data is not None:
+            ans = self.check_data.view(np.uint8)
+            self.assertTrue(np.all(ans == out.view(np.uint8)))
+
+    def test_00_copy(self):
+        self.case = "copy"
+        self.fun = ext.copy
+        self.check = lambda x: x
+
+    def test_01a_trans_byte_elem_scal_16(self):
+        self.case = "byte T elem scal 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01b_trans_byte_elem_scal_32(self):
+        self.case = "byte T elem scal 32"
+        self.data = self.data.view(np.int32)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01c_trans_byte_elem_scal_64(self):
+        self.case = "byte T elem scal 64"
+        self.data = self.data.view(np.int64)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01d_trans_byte_elem_16(self):
+        self.case = "byte T elem SSE 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01e_trans_byte_elem_32(self):
+        self.case = "byte T elem SSE 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01f_trans_byte_elem_64(self):
+        self.case = "byte T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01g_trans_byte_elem_128(self):
+        self.case = "byte T elem SSE 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01h_trans_byte_elem_96(self):
+        self.case = "byte T elem SSE 96"
+        n = self.data.size // 128 * 96
+        dt = np.dtype(
+            [(str("a"), np.int32), (str("b"), np.int32), (str("c"), np.int32)]
+        )
+        self.data = self.data[:n].view(dt)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01i_trans_byte_elem_80(self):
+        self.case = "byte T elem SSE 80"
+        n = self.data.size // 128 * 80
+        dt = np.dtype(
+            [
+                (str("a"), np.int16),
+                (str("b"), np.int16),
+                (str("c"), np.int16),
+                (str("d"), np.int16),
+                (str("e"), np.int16),
+            ]
+        )
+        self.data = self.data[:n].view(dt)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_03a_trans_bit_byte(self):
+        self.case = "bit T byte scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_scal
+        self.check = trans_bit_byte
+
+    def test_03d_trans_bit_byte_SSE(self):
+        self.case = "bit T byte SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_SSE
+        self.check = trans_bit_byte
+
+    def test_03f_trans_bit_byte_AVX(self):
+        self.case = "bit T byte AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_AVX
+        self.check = trans_bit_byte
+
+    def test_03g_trans_bit_byte_AVX_32(self):
+        self.case = "bit T byte AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_byte_AVX
+        self.check = trans_bit_byte
+
+    def test_03h_trans_bit_byte_AVX512(self):
+        self.case = "bit T byte AVX512 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_AVX512
+        self.check = trans_bit_byte
+
+    def test_03g_trans_bit_byte_AVX512_32(self):
+        self.case = "bit T byte AVX512 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_byte_AVX512
+        self.check = trans_bit_byte
+
+    def test_04a_trans_bit_elem_AVX(self):
+        self.case = "bit T elem AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04b_trans_bit_elem_AVX_128(self):
+        self.case = "bit T elem AVX 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04c_trans_bit_elem_AVX_32(self):
+        self.case = "bit T elem AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04d_trans_bit_elem_AVX_16(self):
+        self.case = "bit T elem AVX 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04e_trans_bit_elem_64(self):
+        self.case = "bit T elem scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_04f_trans_bit_elem_SSE_32(self):
+        self.case = "bit T elem SSE 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_04g_trans_bit_elem_SSE_64(self):
+        self.case = "bit T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_04h_trans_bit_elem_AVX512(self):
+        self.case = "bit T elem AVX512 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_AVX512
+        self.check = trans_bit_elem
+
+    def test_04i_trans_bit_elem_AVX512(self):
+        self.case = "bit T elem AVX 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.trans_bit_elem_AVX512
+        self.check = trans_bit_elem
+
+    def test_04j_trans_bit_elem_AVX512_32(self):
+        self.case = "bit T elem AVX512 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_elem_AVX512
+        self.check = trans_bit_elem
+
+    def test_04k_trans_bit_elem_AVX512_16(self):
+        self.case = "bit T elem AVX512 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_bit_elem_AVX512
+        self.check = trans_bit_elem
+
+    def test_06a_untrans_bit_elem_16(self):
+        self.case = "bit U elem SSE 16"
+        pre_trans = self.data.view(np.int16)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06b_untrans_bit_elem_128(self):
+        self.case = "bit U elem SSE 128"
+        pre_trans = self.data.view(np.complex128)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06c_untrans_bit_elem_32(self):
+        self.case = "bit U elem SSE 32"
+        pre_trans = self.data.view(np.float32)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06d_untrans_bit_elem_32(self):
+        self.case = "bit U elem AVX 32"
+        pre_trans = self.data.view(np.float32)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_06e_untrans_bit_elem_64(self):
+        self.case = "bit U elem SSE 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06f_untrans_bit_elem_64(self):
+        self.case = "bit U elem AVX 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_06g_untrans_bit_elem_64(self):
+        self.case = "bit U elem scal 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_scal
+        self.check_data = pre_trans
+
+    def test_06h_untrans_bit_elem_32(self):
+        self.case = "bit U elem AVX512 32"
+        pre_trans = self.data.view(np.float32)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX512
+        self.check_data = pre_trans
+
+    def test_06i_untrans_bit_elem_64(self):
+        self.case = "bit U elem AVX512 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX512
+        self.check_data = pre_trans
+
+    def test_07a_trans_byte_bitrow_64(self):
+        self.case = "byte T row scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_scal
+
+    def test_07b_trans_byte_bitrow_SSE_64(self):
+        self.case = "byte T row SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_SSE
+        self.check = ext.trans_byte_bitrow_scal
+
+    def test_07c_trans_byte_bitrow_AVX_64(self):
+        self.case = "byte T row AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_AVX
+        self.check = ext.trans_byte_bitrow_scal
+
+    def test_08a_shuffle_bit_eight_scal_64(self):
+        self.case = "bit S eight scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_scal
+
+    def test_08b_shuffle_bit_eight_SSE_64(self):
+        self.case = "bit S eight SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_SSE
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08c_shuffle_bit_eight_AVX_32(self):
+        self.case = "bit S eight AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08d_shuffle_bit_eight_AVX_64(self):
+        self.case = "bit S eight AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08e_shuffle_bit_eight_AVX_16(self):
+        self.case = "bit S eight AVX 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08f_shuffle_bit_eight_AVX_128(self):
+        self.case = "bit S eight AVX 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08g_shuffle_bit_eight_AVX512_32(self):
+        self.case = "bit S eight AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.shuffle_bit_eightelem_AVX512
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08h_shuffle_bit_eight_AVX512_64(self):
+        self.case = "bit S eight AVX512 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_AVX512
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08i_shuffle_bit_eight_AVX512_16(self):
+        self.case = "bit S eight AVX512 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.shuffle_bit_eightelem_AVX512
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08i_shuffle_bit_eight_AVX512_128(self):
+        self.case = "bit S eight AVX512 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.shuffle_bit_eightelem_AVX512
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_09a_trans_bit_elem_scal_64(self):
+        self.case = "bit T elem scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_09b_trans_bit_elem_SSE_64(self):
+        self.case = "bit T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_09c_trans_bit_elem_AVX_64(self):
+        self.case = "bit T elem AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_09d_untrans_bit_elem_scal_64(self):
+        self.case = "bit U elem scal 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_scal
+        self.check_data = pre_trans
+
+    def test_09e_untrans_bit_elem_SSE_64(self):
+        self.case = "bit U elem SSE 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_09f_untrans_bit_elem_AVX_64(self):
+        self.case = "bit U elem AVX 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_09g_untrans_bit_elem_AVX_64(self):
+        self.case = "bit U elem AVX512 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX512
+        self.check_data = pre_trans
+
+    def test_10a_bitshuffle_64(self):
+        self.case = "bitshuffle 64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.bitshuffle(x, BLOCK)
+
+    def test_10b_bitunshuffle_64(self):
+        self.case = "bitunshuffle 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.bitshuffle(pre_trans, BLOCK)
+        self.fun = lambda x: ext.bitunshuffle(x, BLOCK)
+        self.check_data = pre_trans
+
+    def test_10c_compress_64(self):
+        self.case = "compress 64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.compress_lz4(x, BLOCK)
+
+    def test_10d_decompress_64(self):
+        self.case = "decompress 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.compress_lz4(pre_trans, BLOCK)
+        self.fun = lambda x: ext.decompress_lz4(
+            x, pre_trans.shape, pre_trans.dtype, BLOCK
+        )
+        self.check_data = pre_trans
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_10c_compress_z64(self):
+        self.case = "compress zstd  64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.compress_zstd(x, BLOCK)
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_10d_decompress_z64(self):
+        self.case = "decompress zstd 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.compress_zstd(pre_trans, BLOCK)
+        self.fun = lambda x: ext.decompress_zstd(
+            x, pre_trans.shape, pre_trans.dtype, BLOCK
+        )
+        self.check_data = pre_trans
+
+
+"""
+Commented out to prevent nose from finding them.
+class TestDevCases(unittest.TestCase):
+
+    def deactivated_test_trans_byte_bitrow_AVX(self):
+        d = np.arange(256, dtype=np.uint32)
+        #d = ext.trans_bit_elem(d)
+        t = ext.trans_byte_bitrow_AVX(d).view(np.uint8)
+        t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8)
+        t.shape = (32, 32)
+        t1.shape = (32, 32)
+        #print t[:20,:18]
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_untrans_bit_elem(self):
+        d = np.arange(32, dtype=np.uint16)
+        #d = random.randint(0, 2**7, 256).astype(np.uint16)
+        d1 = ext.trans_bit_elem(d)
+        #print d
+        t = ext.untrans_bit_elem_AVX(d1)
+        #t1 = ext.untrans_bit_byte_scal(d1)
+        #print np.reshape(d1.view(np.uint8), (16, 4))
+        #print np.reshape(t1.view(np.uint8), (2, 32))
+        #print np.reshape(t2.view(np.uint8), (32, 2))
+        #print np.reshape(t.view(np.uint8), (32, 2))
+
+    def deactivated_test_trans_bit_byte(self):
+        d = np.arange(16, dtype=np.uint16)
+        t = ext.trans_bit_byte_scal(d)
+        #print t
+        t1 = trans_bit_byte(d)
+        #print t1
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_trans_byte_bitrow_SSE(self):
+        d = np.arange(256, dtype = np.uint8)
+        t = ext.trans_byte_bitrow_scal(d)
+        #print np.reshape(t, (32, 8))
+        t1 = ext.trans_byte_bitrow_SSE(d)
+        #print np.reshape(t1, (32, 8))
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_trans_byte_elem_SSE(self):
+        d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')]))
+        d['a'] = np.arange(16) * 1
+        d['b'] = np.arange(16) * 2
+        d['c'] = np.arange(16) * 3
+        #print d.dtype.itemsize
+        #print np.reshape(d.view(np.uint8), (16, 12))
+        t1 = ext.trans_byte_elem_SSE(d)
+        #print np.reshape(t1.view(np.uint8), (12, 16))
+        t0 = trans_byte_elem(d)
+        #print np.reshape(t0.view(np.uint8), (12, 16))
+        self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8)))
+
+    def deactivated_test_bitshuffle(self):
+        d = np.arange(128, dtype=np.uint16)
+        t1 = ext.bitshuffle(d)
+        #print t1
+        t2 = ext.bitunshuffle(t1)
+        #print t2
+        self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8)))
+"""
+
+
+class TestOddLengths(unittest.TestCase):
+    def setUp(self):
+        self.reps = 10
+        self.nmax = 128 * 8
+        # self.nmax = 4 * 8    # XXX
+        self.fun = ext.copy
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_SSE(self):
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_SSE(self):
+        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_AVX(self):
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_trans_bit_elem_AVX512(self):
+        self.fun = ext.trans_bit_elem_AVX512
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_AVX(self):
+        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_untrans_bit_elem_AVX512(self):
+        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_scal(self):
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_scal(self):
+        self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_byte_elem_SSE(self):
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def tearDown(self):
+        try:
+            for dtype in TEST_DTYPES:
+                itemsize = np.dtype(dtype).itemsize
+                nbyte_max = self.nmax * itemsize
+                dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+                dbuf = dbuf.view(dtype)
+                for ii in range(self.reps):
+                    n = random.randint(0, self.nmax // 8, 1)[0] * 8
+                    data = dbuf[:n]
+                    out = self.fun(data).view(np.uint8)
+                    ans = self.check(data).view(np.uint8)
+                    self.assertTrue(np.all(out == ans))
+        except RuntimeError as err:
+            if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -14) and not ext.using_AVX512():
+                return
+            else:
+                raise
+
+
+class TestBitShuffleCircle(unittest.TestCase):
+    """Ensure that final filter is circularly consistent for any data type and
+    any length buffer."""
+
+    def test_circle(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.bitshuffle(data)
+                out = ext.bitunshuffle(shuff)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+    def test_circle_with_compression(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.compress_lz4(data)
+                out = ext.decompress_lz4(shuff, data.shape, data.dtype)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_circle_with_zstd_compression(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.compress_zstd(data)
+                out = ext.decompress_zstd(shuff, data.shape, data.dtype)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+
+# Python implementations for checking results.
+
+
+def trans_byte_elem(arr):
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    in_buf = arr.flat[:].view(np.uint8)
+    nelem = in_buf.size // itemsize
+    in_buf.shape = (nelem, itemsize)
+
+    out_buf = np.empty((itemsize, nelem), dtype=np.uint8)
+    for ii in range(nelem):
+        for jj in range(itemsize):
+            out_buf[jj, ii] = in_buf[ii, jj]
+    return out_buf.flat[:].view(dtype)
+
+
+def trans_bit_byte(arr):
+    n = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    bits = np.unpackbits(arr.view(np.uint8))
+    bits.shape = (n * itemsize, 8)
+    # We have to reverse the order of the bits both for unpacking and packing,
+    # since we want to call the least significant bit the first bit.
+    bits = bits[:, ::-1]
+    bits_shuff = (bits.T).copy()
+    bits_shuff.shape = (n * itemsize, 8)
+    bits_shuff = bits_shuff[:, ::-1]
+    arr_bt = np.packbits(bits_shuff.flat[:])
+    return arr_bt.view(dtype)
+
+
+def trans_bit_elem(arr):
+    n = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    bits = np.unpackbits(arr.view(np.uint8))
+    bits.shape = (n * itemsize, 8)
+    # We have to reverse the order of the bits both for unpacking and packing,
+    # since we want to call the least significant bit the first bit.
+    bits = bits[:, ::-1].copy()
+    bits.shape = (n, itemsize * 8)
+    bits_shuff = (bits.T).copy()
+    bits_shuff.shape = (n * itemsize, 8)
+    bits_shuff = bits_shuff[:, ::-1]
+    arr_bt = np.packbits(bits_shuff.flat[:])
+    return arr_bt.view(dtype)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_h5filter.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_h5filter.py
@ -0,0 +1,138 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+import os
+import glob
+
+import numpy as np
+import h5py
+import pytest
+from h5py import h5z
+
+from bitshuffle import h5, __zstd__
+
+
+os.environ["HDF5_PLUGIN_PATH"] = ""
+
+
+class TestFilter(unittest.TestCase):
+    def test_filter(self):
+        shape = (32 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008, 32000),
+            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
+            filter_opts=None,
+        )
+        f["range"][:] = data
+
+        f.close()
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def test_with_block_size(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008, 32000),
+            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
+            filter_opts=((680,), ()),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def test_with_lz4_compression(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008,),
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    @pytest.mark.skipif(
+        __zstd__ is False,
+        reason="Bitshuffle has not been built with ZSTD support.",
+    )
+    def test_with_zstd_compression(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        compression_lvl = 10
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008,),
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=((0, h5.H5_COMPRESS_ZSTD, compression_lvl),),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def tearDown(self):
+        files = glob.glob("tmp_test_*")
+        for f in files:
+            os.remove(f)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_h5plugin.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_h5plugin.py
@ -0,0 +1,66 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+import unittest
+import os
+import glob
+
+import numpy as np
+import h5py
+import pytest
+from subprocess import Popen, PIPE, STDOUT
+
+import bitshuffle
+
+
+plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__), "plugin")
+os.environ["HDF5_PLUGIN_PATH"] = plugin_dir
+
+
+H5VERSION = h5py.h5.get_libversion()
+if H5VERSION[0] < 1 or (
+    H5VERSION[0] == 1
+    and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11))
+):
+    H51811P = False
+else:
+    H51811P = True
+
+
+class TestFilterPlugins(unittest.TestCase):
+    @pytest.mark.skipif(
+        "CIBUILDWHEEL" in os.environ,
+        reason="Can't build dynamic HDF5 plugin into bitshuffle wheel.",
+    )
+    def test_plugins(self):
+        if not H51811P:
+            return
+        shape = (32 * 1024,)
+        chunks = (4 * 1024,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        dset = f.create_dataset(
+            "range", shape=shape, dtype=dtype, chunks=chunks, compression=32008
+        )
+        dset[:] = data
+        f.close()
+
+        # Make sure the filters are working outside of h5py by calling h5dump
+        h5dump = Popen(["h5dump", fname], stdout=PIPE, stderr=STDOUT)
+        stdout, nothing = h5dump.communicate()
+        err = h5dump.returncode
+        self.assertEqual(err, 0)
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def tearDown(self):
+        files = glob.glob("tmp_test_*")
+        for f in files:
+            os.remove(f)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_regression.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/tests/test_regression.py
@ -0,0 +1,46 @@
+"""
+Test that data encoded with earlier versions can still be decoded correctly.
+
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import pathlib
+import unittest
+
+import numpy as np
+import h5py
+from bitshuffle import __zstd__
+
+from packaging import version
+
+TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
+
+OUT_FILE_TEMPLATE = "regression_%s.h5"
+
+VERSIONS = ["0.1.3", "0.4.0"]
+
+
+class TestAll(unittest.TestCase):
+    def test_regression(self):
+        for rev in VERSIONS:
+            file_name = TEST_DATA_DIR / (OUT_FILE_TEMPLATE % rev)
+            f = h5py.File(file_name, "r")
+            g_orig = f["original"]
+            g_comp = f["compressed"]
+
+            for dset_name in g_comp.keys():
+                self.assertTrue(np.all(g_comp[dset_name][:] == g_orig[dset_name][:]))
+
+            # Only run ZSTD comparison on versions >= 0.4.0 and if ZSTD support
+            # has been built into bitshuffle
+            if version.parse(rev) >= version.parse("0.4.0") and __zstd__:
+                g_comp_zstd = f["compressed_zstd"]
+                for dset_name in g_comp_zstd.keys():
+                    self.assertTrue(
+                        np.all(g_comp_zstd[dset_name][:] == g_orig[dset_name][:])
+                    )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.buckconfig
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.buckconfig
@ -0,0 +1,9 @@
+[cxx]
+  cppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4
+  cflags = -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes -Wundef -Wpointer-arith
+  cxxppflags = -DXXH_NAMESPACE=ZSTD_ -DZSTD_LEGACY_SUPPORT=4
+  cxxflags = -std=c++11 -Wno-deprecated-declarations
+  gtest_dep = //contrib/pzstd:gtest
+
+[httpserver]
+  port = 0
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.buckversion
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.buckversion
@ -0,0 +1 @@
+c8dec2e8da52d483f6dd7c6cd2ad694e8e6fed2b
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.circleci/config.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.circleci/config.yml
@ -0,0 +1,133 @@
+version: 2
+
+jobs:
+  # the first half of the jobs are in this test
+  short-tests-0:
+    # TODO: Create a small custom docker image with all the dependencies we need
+    #       preinstalled to reduce installation time.
+    docker:
+      - image: fbopensource/zstd-circleci-primary:0.0.1
+    steps:
+      - checkout
+      - run:
+          name: Test
+          command: |
+            ./tests/test-license.py
+            cc -v; CFLAGS="-O0 -Werror -pedantic" make all && make clean
+            make c99build         ; make clean
+            make c11build         ; make clean
+            make aarch64build     ; make clean
+            make -j regressiontest; make clean
+            make shortest         ; make clean
+            make cxxtest          ; make clean
+  # the second half of the jobs are in this test
+  short-tests-1:
+    docker:
+      - image: fbopensource/zstd-circleci-primary:0.0.1
+    steps:
+      - checkout
+      - run:
+          name: Test
+          command: |
+            make gnu90build; make clean
+            make gnu99build; make clean
+            make ppc64build V=1; make clean
+            make ppcbuild   V=1; make clean
+            make armbuild   V=1; make clean
+            make -C tests test-legacy test-longmatch; make clean
+            make -C lib libzstd-nomt; make clean
+  # This step should only be run in a cron job
+  regression-test:
+    docker:
+      - image: fbopensource/zstd-circleci-primary:0.0.1
+    environment:
+      CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
+    steps:
+      - checkout
+      # Restore the cached resources.
+      - restore_cache:
+          # We try our best to bust the cache when the data changes by hashing
+          # data.c. If that doesn't work, simply update the version number here
+          # and below. If we fail to bust the cache, the regression testing will
+          # still work, since it has its own stamp, but will need to redownload
+          # everything.
+          keys:
+            - regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+      - run:
+          name: Regression Test
+          command: |
+            make -C programs zstd
+            make -C tests/regression test
+            mkdir -p $CIRCLE_ARTIFACTS
+            ./tests/regression/test                     \
+                --cache  tests/regression/cache         \
+                --output $CIRCLE_ARTIFACTS/results.csv  \
+                --zstd   programs/zstd
+            echo "NOTE: The new results.csv is uploaded as an artifact to this job"
+            echo "      If this fails, go to the Artifacts pane in CircleCI, "
+            echo "      download /tmp/circleci-artifacts/results.csv, and if they "
+            echo "      are still good, copy it into the repo and commit it."
+            echo "> diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv"
+            diff tests/regression/results.csv $CIRCLE_ARTIFACTS/results.csv
+      # Only save the cache on success (default), since if the failure happened
+      # before we stamp the data cache, we will have a bad cache for this key.
+      - save_cache:
+          key: regression-cache-{{ checksum "tests/regression/data.c" }}-v0
+          paths:
+            - tests/regression/cache
+      - store_artifacts:
+          path: /tmp/circleci-artifacts
+
+
+workflows:
+  version: 2
+  commit:
+    jobs:
+      # Run the tests in parallel
+      - short-tests-0:
+          filters:
+            tags:
+              only: /.*/
+      - short-tests-1:
+          filters:
+            tags:
+              only: /.*/
+      # Create a branch called regression and set it to dev to force a
+      # regression test run
+      - regression-test:
+          filters:
+            branches:
+              only:
+                - regression
+  nightly:
+    triggers:
+      - schedule:
+          cron: "0 0 * * *"
+          filters:
+            branches:
+              only:
+                - release
+                - dev
+                - master
+    jobs:
+      # Run daily long regression tests
+      - regression-test
+
+
+
+  # Longer tests
+    #- make -C tests test-zstd-nolegacy && make clean
+    #- pyenv global 3.4.4; make -C tests versionsTest && make clean
+    #- make zlibwrapper         && make clean
+    #- gcc -v; make -C tests test32 MOREFLAGS="-I/usr/include/x86_64-linux-gnu" && make clean
+    #- make uasan               && make clean
+    #- make asan32              && make clean
+    #- make -C tests test32 CC=clang MOREFLAGS="-g -fsanitize=address -I/usr/include/x86_64-linux-gnu"
+  # Valgrind tests
+    #- CFLAGS="-O1 -g" make -C zlibWrapper valgrindTest && make clean
+    #- make -C tests valgrindTest && make clean
+  # ARM, AArch64, PowerPC, PowerPC64 tests
+    #- make ppctest             && make clean
+    #- make ppc64test           && make clean
+    #- make armtest             && make clean
+    #- make aarch64test         && make clean
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.circleci/images/primary/Dockerfile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.circleci/images/primary/Dockerfile
@ -0,0 +1,9 @@
+FROM circleci/buildpack-deps:bionic
+
+RUN sudo dpkg --add-architecture i386
+RUN sudo apt-get -y -qq update
+RUN sudo apt-get -y install \
+    gcc-multilib-powerpc-linux-gnu gcc-arm-linux-gnueabi \
+    libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross \
+    libc6-dev-ppc64-powerpc-cross zstd gzip coreutils \
+    libcurl4-openssl-dev
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.cirrus.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.cirrus.yml
@ -0,0 +1,11 @@
+task:
+  name: FreeBSD (shortest)
+  freebsd_instance:
+    matrix:
+      image_family: freebsd-12-2
+      # The stable 11.3 image causes "Agent is not responding" so use a snapshot
+      image_family: freebsd-11-3-snap
+  install_script: pkg install -y gmake coreutils
+  script: |
+    MOREFLAGS="-Werror" gmake -j all
+    gmake shortest
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.gitattributes
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.gitattributes
@ -0,0 +1,21 @@
+# Set the default behavior
+* text eol=lf
+
+# Explicitly declare source files
+*.c text eol=lf
+*.h text eol=lf
+
+# Denote files that should not be modified.
+*.odt binary
+*.png binary
+
+# Visual Studio
+*.sln text eol=crlf
+*.vcxproj* text eol=crlf
+*.vcproj* text eol=crlf
+*.suo binary
+*.rc text eol=crlf
+
+# Windows
+*.bat text eol=crlf
+*.cmd text eol=crlf
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/ISSUE_TEMPLATE/bug_report.md
@ -0,0 +1,35 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Downloads data '...'
+2. Run '...' with flags '...'
+3. Scroll up on the log to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots and charts**
+If applicable, add screenshots and charts to help explain your problem.
+
+**Desktop (please complete the following information):**
+ - OS: [e.g. Mac]
+ - Version [e.g. 22]
+ - Compiler [e.g gcc]
+ - Flags [e.g O2]
+ - Other relevant hardware specs [e.g. Dual-core]
+ - Build system [e.g. Makefile]
+
+**Additional context**
+Add any other context about the problem here.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/ISSUE_TEMPLATE/feature_request.md
@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/dev-long-tests.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/dev-long-tests.yml
@ -0,0 +1,167 @@
+name: dev-long-tests
+# Tests longer than 10mn
+
+concurrency: 
+  group: long-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+    branches: [ dev, release, actionsTest ]
+
+jobs:
+  make-test:
+    runs-on: ubuntu-latest
+    env:
+      DEVNULLRIGHTS: 1
+      READFROMBLOCKDEVICE: 1
+    steps:
+    - uses: actions/checkout@v2
+    - name: make test
+      run: make test
+
+  make-test-osx:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: OS-X test
+      run: make test # make -c lib all doesn't work because of the fact that it's not a tty
+
+  tsan-zstreamtest:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: thread sanitizer zstreamtest
+      run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
+
+  tsan-fuzztest:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: thread sanitizer fuzztest
+      run: CC=clang make tsan-fuzztest
+
+  gcc-8-asan-ubsan-testzstd:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: gcc-8 + ASan + UBSan + Test Zstd
+      run: |
+        sudo apt-get -qqq update
+        make gcc8install
+        CC=gcc-8 make -j uasan-test-zstd </dev/null V=1
+
+  gcc-asan-ubsan-testzstd-32bit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: ASan + UBSan + Test Zstd, 32bit mode
+      run: |
+        make libc6install
+        make -j uasan-test-zstd32 V=1
+
+    # Note : external libraries must be turned off when using MSAN tests,
+    # because they are not msan-instrumented,
+    # so any data coming from these libraries is always considered "uninitialized"
+
+  gcc-8-asan-ubsan-fuzz:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: gcc-8 + ASan + UBSan + Fuzz Test
+      run: |
+        make gcc8install
+        CC=gcc-8 FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
+
+  gcc-asan-ubsan-fuzz32:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: ASan + UBSan + Fuzz Test 32bit
+      run: |
+        make libc6install
+        CFLAGS="-O3 -m32" FUZZER_FLAGS="--long-tests" make uasan-fuzztest
+
+  asan-ubsan-regression:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: ASan + UBSan + Regression Test
+      run: make -j uasanregressiontest
+
+  msan-regression:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: MSan + Regression Test
+      run: make -j msanregressiontest
+
+  clang-msan-fuzz:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: clang + MSan + Fuzz Test
+      run: |
+        sudo apt-get -qqq update
+        sudo apt-get install clang
+        CC=clang FUZZER_FLAGS="--long-tests" make clean msan-fuzztest
+
+  clang-msan-testzstd:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: clang + MSan + Test Zstd
+      run: |
+        sudo apt-get update
+        sudo apt-get install clang
+        CC=clang make msan-test-zstd HAVE_ZLIB=0 HAVE_LZ4=0 HAVE_LZMA=0 V=1
+
+  armfuzz:
+      runs-on: ubuntu-latest
+      steps:
+      - uses: actions/checkout@v2
+      - name: Qemu ARM emulation + Fuzz Test
+        run: |
+          sudo apt-get -qqq update
+          make arminstall
+          make armfuzz
+
+  valgrind-fuzz-test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: valgrind + fuzz test stack mode    # ~ 7mn
+      shell: 'script -q -e -c "bash {0}"'
+      run: |
+        make valgrindinstall
+        make -C tests valgrindTest
+        make clean
+        make -C tests test-fuzzer-stackmode
+
+  oss-fuzz:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        sanitizer: [address, undefined, memory]
+    steps:
+    - name: Build Fuzzers (${{ matrix.sanitizer }})
+      id: build
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'zstd'
+        dry-run: false
+        sanitizer: ${{ matrix.sanitizer }}
+    - name: Run Fuzzers (${{ matrix.sanitizer }})
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'zstd'
+        fuzz-seconds: 600
+        dry-run: false
+        sanitizer: ${{ matrix.sanitizer }}
+    - name: Upload Crash
+      uses: actions/upload-artifact@v1
+      if: failure() && steps.build.outcome == 'success'
+      with:
+        name: ${{ matrix.sanitizer }}-artifacts
+        path: ./out/artifacts
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/dev-short-tests.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/dev-short-tests.yml
@ -0,0 +1,192 @@
+name: dev-short-tests
+# Faster tests: mostly build tests, along with some other
+# misc tests
+
+concurrency: 
+  group: fast-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+    branches: [ dev, release, actionsTest ]
+
+jobs:
+  linux-kernel:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: linux kernel, library + build + test
+      run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
+
+  benchmarking:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: make benchmarking
+      run: make benchmarking
+
+  check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: make check on 32-bit
+      run: |
+        sudo apt update
+        APT_PACKAGES="gcc-multilib" make apt-install
+        CFLAGS="-m32 -O1 -fstack-protector" make check V=1
+
+  gcc-7-libzstd:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: gcc-7 + libzstdmt compilation
+      run: |
+        sudo apt-get -qqq update
+        make gcc7install
+        CC=gcc-7 CFLAGS=-Werror make -j all
+        make clean
+        LDFLAGS=-Wl,--no-undefined make -C lib libzstd-mt
+
+    # candidate test (to check) : underlink test
+    # LDFLAGS=-Wl,--no-undefined : will make the linker fail if dll is underlinked
+
+  cmake-build-and-test-check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: cmake build and test check
+      run: |
+        FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
+        cp -r ./ "../zstd source"
+        cd "../zstd source"
+        FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
+
+  cpp-gnu90-c99-compatibility:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: C++, gnu90 and c99 compatibility
+      run: |
+        make cxxtest
+        make clean
+        make gnu90build
+        make clean
+        make c99build
+        make clean
+        make travis-install   # just ensures `make install` works
+
+  mingw-cross-compilation:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: mingw cross-compilation
+      run: |
+        # sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
+        sudo apt-get -qqq update
+        sudo apt-get install gcc-mingw-w64
+        CC=x86_64-w64-mingw32-gcc CXX=x86_64-w64-mingw32-g++ CFLAGS="-Werror -O1" make zstd
+
+  armbuild:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: ARM Build Test
+      run: |
+        sudo apt-get -qqq update
+        make arminstall
+        make armbuild
+
+  bourne-shell:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Bourne shell compatibility (shellcheck)
+      run: |
+        wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
+        tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz
+        shellcheck-v0.7.1/shellcheck --shell=sh --severity=warning --exclude=SC2010 tests/playTests.sh
+
+  zlib-wrapper:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: zlib wrapper test
+      run: |
+        sudo apt-get -qqq update
+        make valgrindinstall
+        make -C zlibWrapper test
+        make -C zlibWrapper valgrindTest
+
+  lz4-threadpool-partial-libs:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: LZ4, thread pool, and partial libs testslib wrapper test
+      run: |
+        make lz4install
+        make -C tests test-lz4
+        make check < /dev/null | tee   # mess with lz4 console detection
+        make clean
+        make -C tests test-pool
+        make clean
+        bash tests/libzstd_partial_builds.sh
+
+  gcc-make-tests-32bit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Make all, 32bit mode
+      run: |
+        sudo apt-get -qqq update
+        make libc6install
+        CFLAGS="-Werror -m32" make -j all32
+  
+  gcc-8-make:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: gcc-8 build
+        run: |
+          sudo apt-get -qqq update
+          make gcc8install
+          CC=gcc-8 CFLAGS="-Werror" make -j all
+
+  visual-2015:
+    # only GH actions windows-2016 contains VS 2015
+    runs-on: windows-2016
+    strategy:
+      matrix:
+        platform: [x64, Win32]
+        configuration: [Debug, Release]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Add MSBuild to PATH
+      uses: microsoft/setup-msbuild@v1.0.2
+    - name: Build
+      working-directory: ${{env.GITHUB_WORKSPACE}}
+      run: >
+        msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140
+        /t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
+  
+# For reference : icc tests
+# icc tests are currently failing on Github Actions, likely to issues during installation stage
+# To be fixed later
+#
+#  icc:
+#    name: icc-check
+#    runs-on: ubuntu-latest
+#    steps:
+#    - name: install icc
+#      run: |
+#        export DEBIAN_FRONTEND=noninteractive
+#        sudo apt-get -qqq update
+#        sudo apt-get install -y wget build-essential pkg-config cmake ca-certificates gnupg
+#        sudo wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+#        sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
+#        sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+#        sudo apt-get update
+#        sudo apt-get install -y intel-basekit intel-hpckit
+#    - uses: actions/checkout@v2
+#    - name: make check
+#      run: |
+#        make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/publish-release-artifacts.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.github/workflows/publish-release-artifacts.yml
@ -0,0 +1,68 @@
+name: publish-release-artifacts
+
+on:
+  release:
+    types:
+      - created
+
+jobs:
+  publish-release-artifacts:
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/')
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+
+      - name: Archive
+        env:
+          RELEASE_SIGNING_KEY: ${{ secrets.RELEASE_SIGNING_KEY }}
+          RELEASE_SIGNING_KEY_PASSPHRASE: ${{ secrets.RELEASE_SIGNING_KEY_PASSPHRASE }}
+        run: |
+          # compute file name
+          export TAG="$(echo "$GITHUB_REF" | sed -n 's_^refs/tags/__p')"
+          if [ -z "$TAG" ]; then
+            echo "action must be run on a tag. GITHUB_REF is not a tag: $GITHUB_REF"
+            exit 1
+          fi
+          # Attempt to extract "1.2.3" from "v1.2.3" to maintain artifact name backwards compat.
+          # Otherwise, degrade to using full tag.
+          export VERSION="$(echo "$TAG" | sed 's_^v\([0-9]\+\.[0-9]\+\.[0-9]\+\)$_\1_')"
+          export ZSTD_VERSION="zstd-$VERSION"
+
+          # archive
+          git archive $TAG \
+              --prefix $ZSTD_VERSION/ \
+              --format tar \
+              -o $ZSTD_VERSION.tar
+
+          # Do the rest of the work in a sub-dir so we can glob everything we want to publish.
+          mkdir artifacts/
+          mv $ZSTD_VERSION.tar artifacts/
+          cd artifacts/
+
+          # compress
+          zstd -k -19 $ZSTD_VERSION.tar
+          gzip -k  -9 $ZSTD_VERSION.tar
+
+          # we only publish the compressed tarballs
+          rm $ZSTD_VERSION.tar
+
+          # hash
+          sha256sum $ZSTD_VERSION.tar.zst > $ZSTD_VERSION.tar.zst.sha256
+          sha256sum $ZSTD_VERSION.tar.gz  > $ZSTD_VERSION.tar.gz.sha256
+
+          # sign
+          if [ -n "$RELEASE_SIGNING_KEY" ]; then
+            export GPG_BATCH_OPTS="--batch --no-use-agent --pinentry-mode loopback --no-tty --yes"
+            echo "$RELEASE_SIGNING_KEY" | gpg $GPG_BATCH_OPTS --import
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.zst.sig $ZSTD_VERSION.tar.zst
+            gpg $GPG_BATCH_OPTS --armor --sign --sign-with signing@zstd.net --detach-sig --passphrase "$RELEASE_SIGNING_KEY_PASSPHRASE" --output $ZSTD_VERSION.tar.gz.sig  $ZSTD_VERSION.tar.gz
+          fi
+
+      - name: Publish
+        uses: skx/github-action-publish-binaries@release-1.3
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: artifacts/*
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.gitignore
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.gitignore
@ -0,0 +1,54 @@
+# Object files
+*.o
+*.ko
+*.dSYM
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+/zstd
+zstdmt
+*.exe
+*.out
+*.app
+
+# Test artefacts
+tmp*
+*.zst
+*.zstd
+dictionary.
+dictionary
+NUL
+
+# Build artefacts
+projects/
+bin/
+.buckd/
+buck-out/
+build-*
+*.gcda
+
+# Other files
+.directory
+_codelite/
+_zstdbench/
+.clang_complete
+*.idea
+*.swp
+.DS_Store
+googletest/
+*.d
+*.vscode
+*.code-workspace
+compile_commands.json
+.clangd
+perf.data
+perf.data.old
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.travis.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/.travis.yml
@ -0,0 +1,145 @@
+# Travis CI is used to test platforms that github-actions currently doesn't support
+# without either self-hosting or some finnicky work-around. Also, some tests
+# are troublesome to migrate since GH Actions runs tests not in a tty.
+language: c
+
+git:
+  depth: 1
+
+branches:
+  only:
+  - dev
+  - release
+  - master
+  - travisTest
+
+addons:
+  apt:
+    update: true
+
+env:
+  global:
+    - FUZZERTEST=-T1mn
+      ZSTREAM_TESTTIME=-T1mn
+      DECODECORPUS_TESTTIME=-T1mn
+
+matrix:
+  fast_finish: true
+  include:
+    - name: arm64    # ~2.5 mn
+      os: linux
+      arch: arm64
+      script:
+        - make check
+    
+    - name: arm64fuzz
+      os: linux
+      arch: arm64
+      script:
+        - make -C tests fuzztest
+    
+    # TODO: migrate to GH actions once warnings are fixed
+    - name: Minimal Decompressor Macros    # ~5mn
+      script:
+        - make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
+        - make clean && make check ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
+        - make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
+        - make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT"
+        - make clean && make -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
+        - make clean && make check MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG"
+        - make clean && make -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
+        - make clean && make check MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS"
+
+    # TODO: migrate to GH actions once newest clang staticanalyze warnings are fixed
+    - name: static analyzer scanbuild    # ~26mn
+      dist: trusty  # note : it's important to pin down a version of static analyzer, since different versions report different false positives
+      script:
+        - make staticAnalyze
+    
+    # GH actions can't run this command on OS-X, non-tty issues
+    - name: OS-X make all lib
+      os: osx
+      script:
+        - make -C lib all
+
+    # Introduced to check compat with old toolchains, to prevent e.g. #1872
+    - name: ARM Build Test (on Trusty)
+      dist: trusty
+      script:
+        - make arminstall
+        - make armbuild
+
+    - name: Qemu PPC + Fuzz Test    # ~13mn
+      dist: trusty   # it seems ppc cross-compilation fails on "current"
+      script:
+        - make ppcinstall
+        - make ppcfuzz
+
+    # check release number (release/new tag only)
+    - name: Tag-Specific Test
+      if: tag =~ ^v[0-9]\.[0-9]
+      script:
+        - make -C tests checkTag
+        - tests/checkTag "$TRAVIS_BRANCH"
+
+    - name: PPC64LE + Fuzz test  # ~13mn
+      arch: ppc64le
+      env:
+        - FUZZER_FLAGS=--no-big-tests
+        - MOREFLAGS="-static"
+      script:
+        - cat /proc/cpuinfo
+        - make -C tests fuzztest
+
+    - name: Qemu PPC64 + Fuzz test  # ~13mn, presumed Big-Endian (?)
+      dist: trusty  # note : PPC64 cross-compilation for Qemu tests seems broken on Xenial
+      script:
+        - make ppcinstall
+        - make ppc64fuzz
+
+    # note : we already have aarch64 tests on hardware
+    - name: Qemu aarch64 + Fuzz Test (on Xenial)    # ~14mn
+      dist: xenial
+      script:
+        - make arminstall
+        - make aarch64fuzz
+    
+    - name: Versions Compatibility Test   # 11.5mn
+      script:
+        - make -C tests versionsTest
+
+    # meson dedicated test
+    - name: Xenial (Meson + clang)    # ~15mn
+      dist: bionic
+      language: cpp
+      compiler: clang
+      install:
+        - sudo apt-get install -qq liblz4-dev valgrind tree
+        - |
+          travis_retry curl -o ~/ninja.zip -L 'https://github.com/ninja-build/ninja/releases/download/v1.9.0/ninja-linux.zip' &&
+          unzip ~/ninja.zip -d ~/.local/bin
+        - |
+          travis_retry curl -o ~/get-pip.py -L 'https://bootstrap.pypa.io/get-pip.py' &&
+          python3 ~/get-pip.py --user &&
+          pip3 install --user meson
+      script:
+        - |
+          meson setup \
+            --buildtype=debugoptimized \
+            -Db_lundef=false \
+            -Dauto_features=enabled \
+            -Dbin_programs=true \
+            -Dbin_tests=true \
+            -Dbin_contrib=true \
+            -Ddefault_library=both \
+             build/meson builddir
+        - pushd builddir
+        - ninja
+        - meson test --verbose --no-rebuild
+        - DESTDIR=./staging ninja install
+        - tree ./staging
+      after_failure:
+        - cat "$TRAVIS_BUILD_DIR"/builddir/meson-logs/testlog.txt
+
+  allow_failures:
+    - env: ALLOW_FAILURES=true
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CHANGELOG
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CHANGELOG
@ -0,0 +1,698 @@
+v1.5.0  (May 11, 2021)
+api: Various functions promoted from experimental to stable API: (#2579-2581, @senhuang42)
+  `ZSTD_defaultCLevel()`
+  `ZSTD_getDictID_fromCDict()`
+api: Several experimental functions have been deprecated and will emit a compiler warning (#2582, @senhuang42)
+  `ZSTD_compress_advanced()`
+  `ZSTD_compress_usingCDict_advanced()`
+  `ZSTD_compressBegin_advanced()`
+  `ZSTD_compressBegin_usingCDict_advanced()`
+  `ZSTD_initCStream_srcSize()`
+  `ZSTD_initCStream_usingDict()`
+  `ZSTD_initCStream_usingCDict()`
+  `ZSTD_initCStream_advanced()`
+  `ZSTD_initCStream_usingCDict_advanced()`
+  `ZSTD_resetCStream()`
+api: ZSTDMT_NBWORKERS_MAX reduced to 64 for 32-bit environments (@Cyan4973)
+perf: Significant speed improvements for middle compression levels (#2494, @senhuang42 @terrelln)
+perf: Block splitter to improve compression ratio, enabled by default for high compression levels (#2447, @senhuang42)
+perf: Decompression loop refactor, speed improvements on `clang` and for `--long` modes (#2614 #2630, @Cyan4973)
+perf: Reduced stack usage during compression and decompression entropy stage (#2522 #2524, @terrelln)
+bug: Improve setting permissions of created files (#2525, @felixhandte)
+bug: Fix large dictionary non-determinism (#2607, @terrelln)
+bug: Fix non-determinism test failures on Linux i686 (#2606, @terrelln)
+bug: Fix various dedicated dictionary search bugs (#2540 #2586, @senhuang42 @felixhandte)
+bug: Ensure `ZSTD_estimateCCtxSize*() `monotonically increases with compression level (#2538, @senhuang42)
+bug: Fix --patch-from mode parameter bound bug with small files (#2637, @occivink)
+bug: Fix UBSAN error in decompression (#2625, @terrelln)
+bug: Fix superblock compression divide by zero bug (#2592, @senhuang42)
+bug: Make the number of physical CPU cores detection more robust (#2517, @PaulBone)
+doc: Improve `zdict.h` dictionary training API documentation (#2622, @terrelln)
+doc: Note that public `ZSTD_free*()` functions accept NULL pointers (#2521, @animalize)
+doc: Add style guide docs for open source contributors (#2626, @Cyan4973)
+tests: Better regression test coverage for different dictionary modes (#2559, @senhuang42)
+tests: Better test coverage of index reduction (#2603, @terrelln)
+tests: OSS-Fuzz coverage for seekable format (#2617, @senhuang42)
+tests: Test coverage for ZSTD threadpool API (#2604, @senhuang42)
+build: Dynamic library built multithreaded by default (#2584, @senhuang42)
+build: Move  `zstd_errors.h`  and  `zdict.h`  to  `lib/`  root (#2597, @terrelln)
+build: Allow `ZSTDMT_JOBSIZE_MIN` to be configured at compile-time, reduce default to 512KB (#2611, @Cyan4973)
+build: Single file library build script moved to `build/` directory (#2618, @felixhandte)
+build: `ZBUFF_*()` is no longer built by default (#2583, @senhuang42)
+build: Fixed Meson build (#2548, @SupervisedThinking @kloczek)
+build: Fix excessive compiler warnings with clang-cl and CMake (#2600, @nickhutchinson)
+build: Detect presence of `md5` on Darwin (#2609, @felixhandte)
+build: Avoid SIGBUS on armv6 (#2633, @bmwiedmann)
+cli: `--progress` flag added to always display progress bar (#2595, @senhuang42)
+cli: Allow reading from block devices with `--force` (#2613, @felixhandte)
+cli: Fix CLI filesize display bug (#2550, @Cyan4973)
+cli: Fix windows CLI `--filelist` end-of-line bug (#2620, @Cyan4973)
+contrib: Various fixes for linux kernel patch (#2539, @terrelln)
+contrib: Seekable format - Decompression hanging edge case fix (#2516, @senhuang42)
+contrib: Seekable format - New seek table-only API  (#2113 #2518, @mdittmer @Cyan4973)
+contrib: Seekable format - Fix seek table descriptor check when loading (#2534, @foxeng)
+contrib: Seekable format - Decompression fix for large offsets, (#2594, @azat)
+misc: Automatically published release tarballs available on Github (#2535, @felixhandte)
+
+v1.4.9  (Mar 1, 2021)
+bug: Use `umask()` to Constrain Created File Permissions (#2495, @felixhandte)
+bug: Make Simple Single-Pass Functions Ignore Advanced Parameters (#2498, @terrelln)
+api: Add (De)Compression Tracing Functionality (#2482, @terrelln)
+api: Support References to Multiple DDicts (#2446, @senhuang42)
+api: Add Function to Generate Skippable Frame (#2439, @senhuang42)
+perf: New Algorithms for the Long Distance Matcher (#2483, @mpu)
+perf: Performance Improvements for Long Distance Matcher (#2464, @mpu)
+perf: Don't Shrink Window Log when Streaming with a Dictionary (#2451, @terrelln)
+cli: Fix `--output-dir-mirror`'s Rejection of `..`-Containing Paths (#2512, @felixhandte)
+cli: Allow Input From Console When `-f`/`--force` is Passed (#2466, @felixhandte)
+cli: Improve Help Message (#2500, @senhuang42)
+tests: Remove Flaky Tests (#2455, #2486, #2445, @Cyan4973)
+tests: Correctly Invoke md5 Utility on NetBSD (#2492, @niacat)
+tests: Avoid Using `stat -c` on NetBSD (#2513, @felixhandte)
+build: Zstd CLI Can Now be Linked to Dynamic `libzstd` (#2457, #2454 @Cyan4973)
+build: Hide and Avoid Using Static-Only Symbols (#2501, #2504, @skitt)
+build: CMake: Enable Only C for lib/ and programs/ Projects (#2498, @concatime)
+build: CMake: Use `configure_file()` to Create the `.pc` File (#2462, @lazka)
+build: Fix Fuzzer Compiler Detection & Update UBSAN Flags (#2503, @terrelln)
+build: Add Guards for `_LARGEFILE_SOURCE` and `_LARGEFILE64_SOURCE` (#2444, @indygreg)
+build: Improve `zlibwrapper` Makefile (#2437, @Cyan4973)
+contrib: Add `recover_directory` Program (#2473, @terrelln)
+doc: Change License Year to 2021 (#2452 & #2465, @terrelln & @senhuang42)
+doc: Fix Typos (#2459, @ThomasWaldmann)
+
+v1.4.8  (Dec 18, 2020)
+hotfix: wrong alignment of an internal buffer
+
+v1.4.7  (Dec 16, 2020)
+perf: stronger --long mode at high compression levels, by @senhuang42
+perf: stronger --patch-from at high compression levels, thanks to --long improvements
+perf: faster dictionary compression at medium compression levels, by @felixhandte
+perf: small speed & memory usage improvements for ZSTD_compress2(), by @terrelln
+perf: improved fast compression speeds with Visual Studio, by @animalize
+cli : Set nb of threads with environment variable ZSTD_NBTHREADS, by @senhuang42
+cli : accept decompressing files with *.zstd suffix
+cli : provide a condensed summary by default when processing multiple files
+cli : fix : stdin input no longer confused as user prompt
+cli : improve accuracy of several error messages
+api : new sequence ingestion API, by @senhuang42
+api : shared thread pool: control total nb of threads used by multiple compression jobs, by @marxin
+api : new ZSTD_getDictID_fromCDict(), by @LuAPi
+api : zlibWrapper only uses public API, and is compatible with dynamic library, by @terrelln
+api : fix : multithreaded compression has predictable output even in special cases (see #2327) (issue not accessible from cli)
+api : fix : dictionary compression correctly respects dictionary compression level (see #2303) (issue not accessible from cli)
+build: fix cmake script when using path with spaces, by @terrelln
+build: improved compile-time detection of aarch64/neon platforms, by @bsdimp
+build: Fix building on AIX 5.1, by @likema
+build: compile paramgrill with cmake on Windows, requested by @mirh
+doc : clarify repcode updates in format specification, by @felixhandte
+
+v1.4.6
+fix : Always return dstSize_tooSmall when that is the case
+fix : Fix ZSTD_initCStream_advanced() with static allocation and no dictionary
+perf: Improve small block decompression speed by 20%+, by @terrelln
+perf: Reduce compression stack usage by 1 KB, by @terrelln
+perf: Improve decompression speed by improving ZSTD_wildcopy, by @helloguo (#2252, #2256)
+perf: Improve histogram construction, by @cyan4973 (#2253)
+cli : Add --output-dir-mirror option, by @xxie24 (#2219)
+cli : Warn when (de)compressing multiple files into a single output, by @senhuang42 (#2279)
+cli : Improved progress bar and status summary when (de)compressing multiple files, by @senhuang42 (#2283)
+cli : Call stat less often, by @felixhandte (#2262)
+cli : Allow --patch-from XXX and --filelist XXX in addition to --patch-from=XXX and --filelist=XXX, by @cyan4973 (#2250)
+cli : Allow --patch-from to compress stdin with --stream-size, by @bimbashrestha (#2206)
+api : Do not install zbuff.h, since it has long been deprecated, by @cyan4973 (#2166).
+api : Fix ZSTD_CCtx_setParameter() with ZSTD_c_compressionLevel to make 0 mean default level, by @i-do-cpp (#2291)
+api : Rename ZSTDMT_NBTHREADS_MAX to ZSTDMT_NBWORKERS_MAX, by @marxin (#2228).
+build: Install pkg-config file with CMake and MinGW, by @tonytheodore (#2183)
+build: Install DLL with CMake on Windows, by @BioDataAnalysis (#2221)
+build: Fix DLL install location with CMake, by @xantares and @bimbashrestha (#2186)
+build: Add ZSTD_NO_UNUSED_FUNCTIONS macro to hide unused functions
+build: Add ZSTD_NO_INTRINSICS macro to avoid explicit intrinsics
+build: Add STATIC_BMI2 macro for compile time detection of BMI2 on MSVC, by @Niadb (#2258)
+build: Fix -Wcomma warnings, by @cwoffenden
+build: Remove distutils requirement for meson build, by @neheb (#2197)
+build: Fix cli compilation with uclibc
+build: Fix cli compilation without st_mtime, by @ffontaine (#2246)
+build: Fix shadowing warnings in library
+build: Fix single file library compilation with Enscripten, by @yoshihitoh (#2227)
+misc: Improve single file library and include dictBuilder, by @cwoffenden
+misc: Allow compression dictionaries with missing symbols
+misc: Add freestanding translation script in contrib/freestanding_lib
+misc: Collect all of zstd's libc dependencies into zstd_deps.h
+doc : Add ZSTD_versionString() to manual, by @animalize
+doc : Fix documentation for ZSTD_CCtxParams_setParameter(), by @felixhandte (#2270)
+
+v1.4.5  (May 22, 2020)
+fix : Compression ratio regression on huge files (> 3 GB) using high levels (--ultra) and multithreading, by @terrelln
+perf: Improved decompression speed: x64 : +10% (clang) / +5% (gcc); ARM : from +15% to +50%, depending on SoC, by @terrelln
+perf: Automatically downsizes ZSTD_DCtx when too large for too long (#2069, by @bimbashreshta)
+perf: Improved fast compression speed on aarch64 (#2040, ~+3%, by @caoyzh)
+perf: Small level 1 compression speed gains (depending on compiler)
+cli : New --patch-from command, create and apply patches from files, by @bimbashreshta
+cli : New --filelist= : Provide a list of files to operate upon from a file
+cli : -b -d command can now benchmark decompression on multiple files
+cli : New --no-content-size command
+cli : New --show-default-cparams information command
+api : ZDICT_finalizeDictionary() is promoted to stable (#2111)
+api : new experimental parameter ZSTD_d_stableOutBuffer (#2094)
+build: Generate a single-file libzstd library (#2065, by @cwoffenden)
+build: Relative includes no longer require -I compiler flags for zstd lib subdirs (#2103, by @felixhandte)
+build: zstd now compiles cleanly under -pedantic (#2099)
+build: zstd now compiles with make-4.3
+build: Support mingw cross-compilation from Linux, by @Ericson2314
+build: Meson multi-thread build fix on windows
+build: Some misc icc fixes backed by new ci test on travis
+misc: bitflip analyzer tool, by @felixhandte
+misc: Extend largeNbDicts benchmark to compression
+misc: Edit-distance match finder in contrib/
+doc : Improved beginner CONTRIBUTING.md docs
+doc : New issue templates for zstd
+
+v1.4.4  (Nov 6, 2019)
+perf: Improved decompression speed, by > 10%, by @terrelln
+perf: Better compression speed when re-using a context, by @felixhandte
+perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
+perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
+perf: minor generic speed optimization, by @davidbolvansky
+api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
+api: fixed decoding of magic-less frames, by @terrelln
+api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
+cli: Named pipes support, by @bimbashrestha
+cli: short tar's extension support, by @stokito
+cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
+cli: commands --stream-size=# and --size-hint=#, by @nmagerko
+cli: command --exclude-compressed, by @shashank0791
+cli: faster `-t` test mode
+cli: improved some error messages, by @vangyzen
+cli: fix command `-D dictionary` on Windows, reported by @artyompetrov
+cli: fix rare deadlock condition within dictionary builder, by @terrelln
+build: single-file decoder with emscripten compilation script, by @cwoffenden
+build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
+build: fixed deprecation warning for certain gcc version, reported by @jasonma163
+build: fix compilation on old gcc versions, by @cemeyer
+build: improved installation directories for cmake script, by Dmitri Shubin
+pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
+misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
+misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
+
+v1.4.3  (Aug 20, 2019)
+bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
+bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
+build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
+
+v1.4.2  (Jul 26, 2019)
+bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
+bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
+misc: Validate blocks are smaller than size limit by @vivekmg (#1685)
+misc: Restructure source files by @ephiepark (#1679)
+
+v1.4.1  (Jul 20, 2019)
+bug: Fix data corruption in niche use cases by @terrelln (#1659)
+bug: Fuzz legacy modes, fix uncovered bugs by @terrelln (#1593, #1594, #1595)
+bug: Fix out of bounds read by @terrelln (#1590)
+perf: Improve decode speed by ~7% @mgrice (#1668)
+perf: Slightly improved compression ratio of level 3 and 4 (ZSTD_dfast) by @cyan4973 (#1681)
+perf: Slightly faster compression speed when re-using a context by @cyan4973 (#1658)
+perf: Improve compression ratio for small windowLog by @cyan4973 (#1624)
+perf: Faster compression speed in high compression mode for repetitive data by @terrelln (#1635)
+api: Add parameter to generate smaller dictionaries by @tyler-tran (#1656)
+cli: Recognize symlinks when built in C99 mode by @felixhandte (#1640)
+cli: Expose cpu load indicator for each file on -vv mode by @ephiepark (#1631)
+cli: Restrict read permissions on destination files by @chungy (#1644)
+cli: zstdgrep: handle -f flag by @felixhandte (#1618)
+cli: zstdcat: follow symlinks by @vejnar (#1604)
+doc: Remove extra size limit on compressed blocks by @felixhandte (#1689)
+doc: Fix typo by @yk-tanigawa (#1633)
+doc: Improve documentation on streaming buffer sizes by @cyan4973 (#1629)
+build: CMake: support building with LZ4 @leeyoung624 (#1626)
+build: CMake: install zstdless and zstdgrep by @leeyoung624 (#1647)
+build: CMake: respect existing uninstall target by @j301scott (#1619)
+build: Make: skip multithread tests when built without support by @michaelforney (#1620)
+build: Make: Fix examples/ test target by @sjnam (#1603)
+build: Meson: rename options out of deprecated namespace by @lzutao (#1665)
+build: Meson: fix build by @lzutao (#1602)
+build: Visual Studio: don't export symbols in static lib by @scharan (#1650)
+build: Visual Studio: fix linking by @absotively (#1639)
+build: Fix MinGW-W64 build by @myzhang1029 (#1600)
+misc: Expand decodecorpus coverage by @ephiepark (#1664)
+
+v1.4.0  (Apr 17, 2019)
+perf: Improve level 1 compression speed in most scenarios by 6% by @gbtucker and @terrelln
+api: Move the advanced API, including all functions in the staging section, to the stable section
+api: Make ZSTD_e_flush and ZSTD_e_end block for maximum forward progress
+api: Rename ZSTD_CCtxParam_getParameter to ZSTD_CCtxParams_getParameter
+api: Rename ZSTD_CCtxParam_setParameter to ZSTD_CCtxParams_setParameter
+api: Don't export ZSTDMT functions from the shared library by default
+api: Require ZSTD_MULTITHREAD to be defined to use ZSTDMT
+api: Add ZSTD_decompressBound() to provide an upper bound on decompressed size by @shakeelrao
+api: Fix ZSTD_decompressDCtx() corner cases with a dictionary
+api: Move ZSTD_getDictID_*() functions to the stable section
+api: Add ZSTD_c_literalCompressionMode flag to enable or disable literal compression by @terrelln
+api: Allow compression parameters to be set when a dictionary is used
+api: Allow setting parameters before or after ZSTD_CCtx_loadDictionary() is called
+api: Fix ZSTD_estimateCStreamSize_usingCCtxParams()
+api: Setting ZSTD_d_maxWindowLog to 0 means use the default
+cli: Ensure that a dictionary is not used to compress itself by @shakeelrao
+cli: Add --[no-]compress-literals flag to enable or disable literal compression
+doc: Update the examples to use the advanced API
+doc: Explain how to transition from old streaming functions to the advanced API in the header
+build: Improve the Windows release packages
+build: Improve CMake build by @hjmjohnson
+build: Build fixes for FreeBSD by @lwhsu
+build: Remove redundant warnings by @thatsafunnyname
+build: Fix tests on OpenBSD by @bket
+build: Extend fuzzer build system to work with the new clang engine
+build: CMake now creates the libzstd.so.1 symlink
+build: Improve Menson build by @lzutao
+misc: Fix symbolic link detection on FreeBSD
+misc: Use physical core count for -T0 on FreeBSD by @cemeyer
+misc: Fix zstd --list on truncated files by @kostmo
+misc: Improve logging in debug mode by @felixhandte
+misc: Add CirrusCI tests by @lwhsu
+misc: Optimize dictionary memory usage in corner cases
+misc: Improve the dictionary builder on small or homogeneous data
+misc: Fix spelling across the repo by @jsoref
+
+v1.3.8  (Dec 28, 2018)
+perf: better decompression speed on large files (+7%) and cold dictionaries (+15%)
+perf: slightly better compression ratio at high compression modes
+api : finalized advanced API, last stage before "stable" status
+api : new --rsyncable mode, by @terrelln
+api : support decompression of empty frames into NULL (used to be an error) (#1385)
+build: new set of macros to build a minimal size decoder, by @felixhandte
+build: fix compilation on MIPS32, reported by @clbr (#1441)
+build: fix compilation with multiple -arch flags, by @ryandesign
+build: highly upgraded meson build, by @lzutao
+build: improved buck support, by @obelisk
+build: fix cmake script : can create debug build, by @pitrou
+build: Makefile : grep works on both colored consoles and systems without color support
+build: fixed zstd-pgo, by @bmwiedemann
+cli : support ZSTD_CLEVEL environment variable, by @yijinfb (#1423)
+cli : --no-progress flag, preserving final summary (#1371), by @terrelln
+cli : ensure destination file is not source file (#1422)
+cli : clearer error messages, especially when input file not present
+doc : clarified zstd_compression_format.md, by @ulikunitz
+misc: fixed zstdgrep, returns 1 on failure, by @lzutao
+misc: NEWS renamed as CHANGELOG, in accordance with fboss
+
+v1.3.7  (Oct 20, 2018)
+perf: slightly better decompression speed on clang (depending on hardware target)
+fix : performance of dictionary compression for small input < 4 KB at levels 9 and 10
+build: no longer build backtrace by default in release mode; restrict further automatic mode
+build: control backtrace support through build macro BACKTRACE
+misc: added man pages for zstdless and zstdgrep, by @samrussell
+
+v1.3.6  (Oct 6, 2018)
+perf: much faster dictionary builder, by @jenniferliu
+perf: faster dictionary compression on small data when using multiple contexts, by @felixhandte
+perf: faster dictionary decompression when using a very large number of dictionaries simultaneously
+cli : fix : does no longer overwrite destination when source does not exist (#1082)
+cli : new command --adapt, for automatic compression level adaptation
+api : fix : block api can be streamed with > 4 GB, reported by @catid
+api : reduced ZSTD_DDict size by 2 KB
+api : minimum negative compression level is defined, and can be queried using ZSTD_minCLevel().
+build: support Haiku target, by @korli
+build: Read Legacy format is limited to v0.5+ by default. Can be changed at compile time with macro ZSTD_LEGACY_SUPPORT.
+doc : zstd_compression_format.md updated to match wording in IETF RFC 8478
+misc: tests/paramgrill, a parameter optimizer, by @GeorgeLu97
+
+v1.3.5  (Jun 29, 2018)
+perf: much faster dictionary compression, by @felixhandte
+perf: small quality improvement for dictionary generation, by @terrelln
+perf: slightly improved high compression levels (notably level 19)
+mem : automatic memory release for long duration contexts
+cli : fix : overlapLog can be manually set
+cli : fix : decoding invalid lz4 frames
+api : fix : performance degradation for dictionary compression when using advanced API, by @terrelln
+api : change : clarify ZSTD_CCtx_reset() vs ZSTD_CCtx_resetParameters(), by @terrelln
+build: select custom libzstd scope through control macros, by @GeorgeLu97
+build: OpenBSD patch, by @bket
+build: make and make all are compatible with -j
+doc : clarify zstd_compression_format.md, updated for IETF RFC process
+misc: pzstd compatible with reproducible compilation, by @lamby
+
+v1.3.4  (Mar 27, 2018)
+perf: faster speed (especially decoding speed) on recent cpus (haswell+)
+perf: much better performance associating --long with multi-threading, by @terrelln
+perf: better compression at levels 13-15
+cli : asynchronous compression by default, for faster experience (use --single-thread for former behavior)
+cli : smoother status report in multi-threading mode
+cli : added command --fast=#, for faster compression modes
+cli : fix crash when not overwriting existing files, by Pádraig Brady (@pixelb)
+api : `nbThreads` becomes `nbWorkers` : 1 triggers asynchronous mode
+api : compression levels can be negative, for even more speed
+api : ZSTD_getFrameProgression() : get precise progress status of ZSTDMT anytime
+api : ZSTDMT can accept new compression parameters during compression
+api : implemented all advanced dictionary decompression prototypes
+build: improved meson recipe, by Shawn Landden (@shawnl)
+build: VS2017 scripts, by @HaydnTrigg
+misc: all /contrib projects fixed
+misc: added /contrib/docker script by @gyscos
+
+v1.3.3  (Dec 21, 2017)
+perf: faster zstd_opt strategy (levels 16-19)
+fix : bug #944 : multithreading with shared ditionary and large data, reported by @gsliepen
+cli : fix : content size written in header by default
+cli : fix : improved LZ4 format support, by @felixhandte
+cli : new : hidden command `-S`, to benchmark multiple files while generating one result per file
+api : fix : support large skippable frames, by @terrelln
+api : fix : streaming interface was adding a useless 3-bytes null block to small frames
+api : change : when setting `pledgedSrcSize`, use `ZSTD_CONTENTSIZE_UNKNOWN` macro value to mean "unknown"
+build: fix : compilation under rhel6 and centos6, reported by @pixelb
+build: added `check` target
+
+v1.3.2  (Oct 10, 2017)
+new : long range mode, using --long command, by Stella Lau (@stellamplau)
+new : ability to generate and decode magicless frames (#591)
+changed : maximum nb of threads reduced to 200, to avoid address space exhaustion in 32-bits mode
+fix : multi-threading compression works with custom allocators
+fix : ZSTD_sizeof_CStream() was over-evaluating memory usage
+fix : a rare compression bug when compression generates very large distances and bunch of other conditions (only possible at --ultra -22)
+fix : 32-bits build can now decode large offsets (levels 21+)
+cli : added LZ4 frame support by default, by Felix Handte (@felixhandte)
+cli : improved --list output
+cli : new : can split input file for dictionary training, using command -B#
+cli : new : clean operation artefact on Ctrl-C interruption
+cli : fix : do not change /dev/null permissions when using command -t with root access, reported by @mike155 (#851)
+cli : fix : write file size in header in multiple-files mode
+api : added macro ZSTD_COMPRESSBOUND() for static allocation
+api : experimental : new advanced decompression API
+api : fix : sizeof_CCtx() used to over-estimate
+build: fix : no-multithread variant compiles without pool.c dependency, reported by Mitchell Blank Jr (@mitchblank) (#819)
+build: better compatibility with reproducible builds, by Bernhard M. Wiedemann (@bmwiedemann) (#818)
+example : added streaming_memory_usage
+license : changed /examples license to BSD + GPLv2
+license : fix a few header files to reflect new license (#825)
+
+v1.3.1  (Aug 21, 2017)
+New license : BSD + GPLv2
+perf: substantially decreased memory usage in Multi-threading mode, thanks to reports by Tino Reichardt (@mcmilk)
+perf: Multi-threading supports up to 256 threads. Cap at 256 when more are requested (#760)
+cli : improved and fixed --list command, by @ib (#772)
+cli : command -vV to list supported formats, by @ib (#771)
+build : fixed binary variants, reported by @svenha (#788)
+build : fix Visual compilation for non x86/x64 targets, reported by Greg Slazinski (@GregSlazinski) (#718)
+API exp : breaking change : ZSTD_getframeHeader() provides more information
+API exp : breaking change : pinned down values of error codes
+doc : fixed huffman example, by Ulrich Kunitz (@ulikunitz)
+new : contrib/adaptive-compression, I/O driven compression strength, by Paul Cruz (@paulcruz74)
+new : contrib/long_distance_matching, statistics by Stella Lau (@stellamplau)
+updated : contrib/linux-kernel, by Nick Terrell (@terrelln)
+
+v1.3.0  (Jul 6, 2017)
+cli : new : `--list` command, by Paul Cruz
+cli : changed : xz/lzma support enabled by default
+cli : changed : `-t *` continue processing list after a decompression error
+API : added : ZSTD_versionString()
+API : promoted to stable status : ZSTD_getFrameContentSize(), by Sean Purcell
+API exp : new advanced API : ZSTD_compress_generic(), ZSTD_CCtx_setParameter()
+API exp : new : API for static or external allocation : ZSTD_initStatic?Ctx()
+API exp : added : ZSTD_decompressBegin_usingDDict(), requested by Guy Riddle (#700)
+API exp : clarified memory estimation / measurement functions.
+API exp : changed : strongest strategy renamed ZSTD_btultra, fastest strategy ZSTD_fast set to 1
+tools : decodecorpus can generate random dictionary-compressed samples, by Paul Cruz
+new : contrib/seekable_format, demo and API, by Sean Purcell
+changed : contrib/linux-kernel, updated version and license, by Nick Terrell
+
+v1.2.0  (May 5, 2017)
+cli : changed : Multithreading enabled by default (use target zstd-nomt or HAVE_THREAD=0 to disable)
+cli : new : command -T0 means "detect and use nb of cores", by Sean Purcell
+cli : new : zstdmt symlink hardwired to `zstd -T0`
+cli : new : command --threads=# (#671)
+cli : changed : cover dictionary builder by default, for improved quality, by Nick Terrell
+cli : new : commands --train-cover and --train-legacy, to select dictionary algorithm and parameters
+cli : experimental targets `zstd4` and `xzstd4`, with support for lz4 format, by Sean Purcell
+cli : fix : does not output compressed data on console
+cli : fix : ignore symbolic links unless --force specified,
+API : breaking change : ZSTD_createCDict_advanced(), only use compressionParameters as argument
+API : added : prototypes ZSTD_*_usingCDict_advanced(), for direct control over frameParameters.
+API : improved: ZSTDMT_compressCCtx() reduced memory usage
+API : fix : ZSTDMT_compressCCtx() now provides srcSize in header (#634)
+API : fix : src size stored in frame header is controlled at end of frame
+API : fix : enforced consistent rules for pledgedSrcSize==0 (#641)
+API : fix : error code "GENERIC" replaced by "dstSizeTooSmall" when appropriate
+build: improved cmake script, by @Majlen
+build: enabled Multi-threading support for *BSD, by Baptiste Daroussin
+tools: updated Paramgrill. Command -O# provides best parameters for sample and speed target.
+new : contrib/linux-kernel version, by Nick Terrell
+
+v1.1.4  (Mar 18, 2017)
+cli : new : can compress in *.gz format, using --format=gzip command, by Przemyslaw Skibinski
+cli : new : advanced benchmark command --priority=rt
+cli : fix : write on sparse-enabled file systems in 32-bits mode, by @ds77
+cli : fix : --rm remains silent when input is stdin
+cli : experimental : xzstd, with support for xz/lzma decoding, by Przemyslaw Skibinski
+speed : improved decompression speed in streaming mode for single shot scenarios (+5%)
+memory: DDict (decompression dictionary) memory usage down from 150 KB to 20 KB
+arch: 32-bits variant able to generate and decode very long matches (>32 MB), by Sean Purcell
+API : new : ZSTD_findFrameCompressedSize(), ZSTD_getFrameContentSize(), ZSTD_findDecompressedSize()
+API : changed : dropped support of legacy versions <= v0.3 (can be changed by modifying ZSTD_LEGACY_SUPPORT value)
+build : new: meson build system in contrib/meson, by Dima Krasner
+build : improved cmake script, by @Majlen
+build : added -Wformat-security flag, as recommended by Padraig Brady
+doc : new : educational decoder, by Sean Purcell
+
+v1.1.3  (Feb 7, 2017)
+cli : zstd can decompress .gz files (can be disabled with `make zstd-nogz` or `make HAVE_ZLIB=0`)
+cli : new : experimental target `make zstdmt`, with multi-threading support
+cli : new : improved dictionary builder "cover" (experimental), by Nick Terrell, based on prior work by Giuseppe Ottaviano.
+cli : new : advanced commands for detailed parameters, by Przemyslaw Skibinski
+cli : fix zstdless on Mac OS-X, by Andrew Janke
+cli : fix #232 "compress non-files"
+dictBuilder : improved dictionary generation quality, thanks to Nick Terrell
+API : new : lib/compress/ZSTDMT_compress.h multithreading API (experimental)
+API : new : ZSTD_create?Dict_byReference(), requested by Bartosz Taudul
+API : new : ZDICT_finalizeDictionary()
+API : fix : ZSTD_initCStream_usingCDict() properly writes dictID into frame header, by Gregory Szorc (#511)
+API : fix : all symbols properly exposed in libzstd, by Nick Terrell
+build : support for Solaris target, by Przemyslaw Skibinski
+doc : clarified specification, by Sean Purcell
+
+v1.1.2  (Dec 15, 2016)
+API : streaming : decompression : changed : automatic implicit reset when chain-decoding new frames without init
+API : experimental : added : dictID retrieval functions, and ZSTD_initCStream_srcSize()
+API : zbuff : changed : prototypes now generate deprecation warnings
+lib : improved : faster decompression speed at ultra compression settings and 32-bits mode
+lib : changed : only public ZSTD_ symbols are now exposed
+lib : changed : reduced usage  of stack memory
+lib : fixed : several corner case bugs, by Nick Terrell
+cli : new : gzstd, experimental version able to decode .gz files, by Przemyslaw Skibinski
+cli : new : preserve file attributes
+cli : new : added zstdless and zstdgrep tools
+cli : fixed : status displays total amount decoded, even for file consisting of multiple frames (like pzstd)
+cli : fixed : zstdcat
+zlib_wrapper : added support for gz* functions, by Przemyslaw Skibinski
+install : better compatibility with FreeBSD, by Dimitry Andric
+source tree : changed : zbuff source files moved to lib/deprecated
+
+v1.1.1  (Nov 2, 2016)
+New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
+New : doc/zstd_manual.html, by Przemyslaw Skibinski
+Improved : slightly better compression ratio at --ultra levels (>= 20)
+Improved : better memory usage when using streaming compression API, thanks to @Rogier-5 report
+Added : API : ZSTD_initCStream_usingCDict(), ZSTD_initDStream_usingDDict() (experimental section)
+Added : example/multiple_streaming_compression.c
+Changed : zstd_errors.h is now installed within /include (and replaces errors_public.h)
+Updated man page
+Fixed : zstd-small, zstd-compress and zstd-decompress compilation targets
+
+v1.1.0  (Sep 28, 2016)
+New : contrib/pzstd, parallel version of zstd, by Nick Terrell
+added : NetBSD install target (#338)
+Improved : speed for batches of small files
+Improved : speed of zlib wrapper, by Przemyslaw Skibinski
+Changed : libzstd on Windows supports legacy formats, by Christophe Chevalier
+Fixed : CLI -d output to stdout by default when input is stdin (#322)
+Fixed : CLI correctly detects console on Mac OS-X
+Fixed : CLI supports recursive mode `-r` on Mac OS-X
+Fixed : Legacy decoders use unified error codes, reported by benrg (#341), fixed by Przemyslaw Skibinski
+Fixed : compatibility with OpenBSD, reported by Juan Francisco Cantero Hurtado (#319)
+Fixed : compatibility with Hurd, by Przemyslaw Skibinski (#365)
+Fixed : zstd-pgo, reported by octoploid (#329)
+
+v1.0.0  (Sep 1, 2016)
+Change Licensing, all project is now BSD, Copyright Facebook
+Small decompression speed improvement
+API : Streaming API supports legacy format
+API : ZDICT_getDictID(), ZSTD_sizeof_{CCtx, DCtx, CStream, DStream}(), ZSTD_setDStreamParameter()
+CLI supports legacy formats v0.4+
+Fixed : compression fails on certain huge files, reported by Jesse McGrew
+Enhanced documentation, by Przemyslaw Skibinski
+
+v0.8.1  (Aug 18, 2016)
+New streaming API
+Changed : --ultra now enables levels beyond 19
+Changed : -i# now selects benchmark time in second
+Fixed : ZSTD_compress* can now compress > 4 GB in a single pass, reported by Nick Terrell
+Fixed : speed regression on specific patterns (#272)
+Fixed : support for Z_SYNC_FLUSH, by Dmitry Krot (#291)
+Fixed : ICC compilation, by Przemyslaw Skibinski
+
+v0.8.0  (Aug 2, 2016)
+Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
+New : Build on FreeBSD and DragonFly, thanks to JrMarino
+Changed : modified API : ZSTD_compressEnd()
+Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
+Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
+Fixed : large dictionaries (> 384 KB), reported by Ilona Papava
+Fixed : checksum correctly checked in single-pass mode
+Fixed : combined --test amd --rm, reported by Andreas M. Nilsson
+Modified : minor compression level adaptations
+Updated : compression format specification to v0.2.0
+changed : zstd.h moved to /lib directory
+
+v0.7.5  (Aug 1, 2016)
+Transition version, supporting decoding of v0.8.x
+
+v0.7.4  (Jul 17, 2016)
+Added : homebrew for Mac, by Daniel Cade
+Added : more examples
+Fixed : segfault when using small dictionaries, reported by Felix Handte
+Modified : default compression level for CLI is now 3
+Updated : specification, to v0.1.1
+
+v0.7.3  (Jul 9, 2016)
+New : compression format specification
+New : `--` separator, stating that all following arguments are file names. Suggested by Chip Turner.
+New : `ZSTD_getDecompressedSize()`
+New : OpenBSD target, by Juan Francisco Cantero Hurtado
+New : `examples` directory
+fixed : dictBuilder using HC levels, reported by Bartosz Taudul
+fixed : legacy support from ZSTD_decompress_usingDDict(), reported by Felix Handte
+fixed : multi-blocks decoding with intermediate uncompressed blocks, reported by Greg Slazinski
+modified : removed "mem.h" and "error_public.h" dependencies from "zstd.h" (experimental section)
+modified : legacy functions no longer need magic number
+
+v0.7.2  (Jul 4, 2016)
+fixed : ZSTD_decompressBlock() using multiple consecutive blocks. Reported by Greg Slazinski.
+fixed : potential segfault on very large files (many gigabytes). Reported by Chip Turner.
+fixed : CLI displays system error message when destination file cannot be created (#231). Reported by Chip Turner.
+
+v0.7.1  (Jun 23, 2016)
+fixed : ZBUFF_compressEnd() called multiple times with too small `dst` buffer, reported by Christophe Chevalier
+fixed : dictBuilder fails if first sample is too small, reported by Руслан Ковалёв
+fixed : corruption issue, reported by cj
+modified : checksum enabled by default in command line mode
+
+v0.7.0  (Jun 17, 2016)
+New : Support for directory compression, using `-r`, thanks to Przemyslaw Skibinski
+New : Command `--rm`, to remove source file after successful de/compression
+New : Visual build scripts, by Christophe Chevalier
+New : Support for Sparse File-systems (do not use space for zero-filled sectors)
+New : Frame checksum support
+New : Support pass-through mode (when using `-df`)
+API : more efficient Dictionary API : `ZSTD_compress_usingCDict()`, `ZSTD_decompress_usingDDict()`
+API : create dictionary files from custom content, by Giuseppe Ottaviano
+API : support for custom malloc/free functions
+New : controllable Dictionary ID
+New : Support for skippable frames
+
+v0.6.1  (May 13, 2016)
+New : zlib wrapper API, thanks to Przemyslaw Skibinski
+New : Ability to compile compressor / decompressor separately
+Changed : new lib directory structure
+Fixed : Legacy codec v0.5 compatible with dictionary decompression
+Fixed : Decoder corruption error (#173)
+Fixed : null-string roundtrip (#176)
+New : benchmark mode can select directory as input
+Experimental : midipix support, VMS support
+
+v0.6.0  (Apr 13, 2016)
+Stronger high compression modes, thanks to Przemyslaw Skibinski
+API : ZSTD_getFrameParams() provides size of decompressed content
+New : highest compression modes require `--ultra` command to fully unleash their capacity
+Fixed : zstd cli return error code > 0 and removes dst file artifact when decompression fails, thanks to Chip Turner
+
+v0.5.1  (Feb 18, 2016)
+New : Optimal parsing => Very high compression modes, thanks to Przemyslaw Skibinski
+Changed : Dictionary builder integrated into libzstd and zstd cli
+Changed (!) : zstd cli now uses "multiple input files" as default mode. See `zstd -h`.
+Fix : high compression modes for big-endian platforms
+New : zstd cli : `-t` | `--test` command
+
+v0.5.0  (Feb 5, 2016)
+New : dictionary builder utility
+Changed : streaming & dictionary API
+Improved : better compression of small data
+
+v0.4.7  (Jan 22, 2016)
+Improved : small compression speed improvement in HC mode
+Changed : `zstd_decompress.c` has ZSTD_LEGACY_SUPPORT to 0 by default
+fix : bt search bug
+
+v0.4.6  (Jan 13, 2016)
+fix : fast compression mode on Windows
+New : cmake configuration file, thanks to Artyom Dymchenko
+Improved : high compression mode on repetitive data
+New : block-level API
+New : ZSTD_duplicateCCtx()
+
+v0.4.5  (Dec 18, 2015)
+new : -m/--multiple : compress/decompress multiple files
+
+v0.4.4  (Dec 14, 2015)
+Fixed : high compression modes for Windows 32 bits
+new : external dictionary API extended to buffered mode and accessible through command line
+new : windows DLL project, thanks to Christophe Chevalier
+
+v0.4.3  (Dec 7, 2015)
+new : external dictionary API
+new : zstd-frugal
+
+v0.4.2  (Dec 2, 2015)
+Generic minor improvements for small blocks
+Fixed : big-endian compatibility, by Peter Harris (#85)
+
+v0.4.1  (Dec 1, 2015)
+Fixed : ZSTD_LEGACY_SUPPORT=0 build mode (reported by Luben)
+removed `zstd.c`
+
+v0.4.0  (Nov 29, 2015)
+Command line utility compatible with high compression levels
+Removed zstdhc => merged into zstd
+Added : ZBUFF API (see zstd_buffered.h)
+Rolling buffer support
+
+v0.3.6  (Nov 10, 2015)
+small blocks params
+
+v0.3.5  (Nov 9, 2015)
+minor generic compression improvements
+
+v0.3.4  (Nov 6, 2015)
+Faster fast cLevels
+
+v0.3.3  (Nov 5, 2015)
+Small compression ratio improvement
+
+v0.3.2  (Nov 2, 2015)
+Fixed Visual Studio
+
+v0.3.1  (Nov 2, 2015)
+Small compression ratio improvement
+
+v0.3  (Oct 30, 2015)
+HC mode : compression levels 2-26
+
+v0.2.2  (Oct 28, 2015)
+Fix : Visual Studio 2013 & 2015 release compilation, by Christophe Chevalier
+
+v0.2.1  (Oct 24, 2015)
+Fix : Read errors, advanced fuzzer tests, by Hanno Böck
+
+v0.2.0  (Oct 22, 2015)
+**Breaking format change**
+Faster decompression speed
+Can still decode v0.1 format
+
+v0.1.3  (Oct 15, 2015)
+fix uninitialization warning, reported by Evan Nemerson
+
+v0.1.2  (Sep 11, 2015)
+frame concatenation support
+
+v0.1.1  (Aug 27, 2015)
+fix compression bug
+detects write-flush errors
+
+v0.1.0  (Aug 25, 2015)
+first release
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CODE_OF_CONDUCT.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CODE_OF_CONDUCT.md
@ -0,0 +1,5 @@
+# Code of Conduct
+
+Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
+Please read the [full text](https://code.fb.com/codeofconduct/)
+so that you can understand what actions will and will not be tolerated.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CONTRIBUTING.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/CONTRIBUTING.md
@ -0,0 +1,504 @@
+# Contributing to Zstandard
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Our Development Process
+New versions are being developed in the "dev" branch,
+or in their own feature branch.
+When they are deemed ready for a release, they are merged into "release".
+
+As a consequences, all contributions must stage first through "dev"
+or their own feature branch.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `dev`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Workflow
+Zstd uses a branch-based workflow for making changes to the codebase. Typically, zstd
+will use a new branch per sizable topic. For smaller changes, it is okay to lump multiple
+related changes into a branch.
+
+Our contribution process works in three main stages:
+1. Local development
+    * Update:
+        * Checkout your fork of zstd if you have not already
+        ```
+        git checkout https://github.com/<username>/zstd
+        cd zstd
+        ```
+        * Update your local dev branch
+        ```
+        git pull https://github.com/facebook/zstd dev
+        git push origin dev
+        ```
+    * Topic and development:
+        * Make a new branch on your fork about the topic you're developing for
+        ```
+        # branch names should be consise but sufficiently informative
+        git checkout -b <branch-name>
+        git push origin <branch-name>
+        ```
+        * Make commits and push
+        ```
+        # make some changes =
+        git add -u && git commit -m <message>
+        git push origin <branch-name>
+        ```
+        * Note: run local tests to ensure that your changes didn't break existing functionality
+            * Quick check
+            ```
+            make shortest
+            ```
+            * Longer check
+            ```
+            make test
+            ```
+2. Code Review and CI tests
+    * Ensure CI tests pass:
+        * Before sharing anything to the community, make sure that all CI tests pass on your local fork.
+        See our section on setting up your CI environment for more information on how to do this.
+        * Ensure that static analysis passes on your development machine. See the Static Analysis section
+        below to see how to do this.
+    * Create a pull request:
+        * When you are ready to share you changes to the community, create a pull request from your branch
+        to facebook:dev. You can do this very easily by clicking 'Create Pull Request' on your fork's home
+        page.
+        * From there, select the branch where you made changes as your source branch and facebook:dev
+        as the destination.
+        * Examine the diff presented between the two branches to make sure there is nothing unexpected.
+    * Write a good pull request description:
+        * While there is no strict template that our contributors follow, we would like them to
+        sufficiently summarize and motivate the changes they are proposing. We recommend all pull requests,
+        at least indirectly, address the following points.
+            * Is this pull request important and why?
+            * Is it addressing an issue? If so, what issue? (provide links for convenience please)
+            * Is this a new feature? If so, why is it useful and/or necessary?
+            * Are there background references and documents that reviewers should be aware of to properly assess this change?
+        * Note: make sure to point out any design and architectural decisions that you made and the rationale behind them.
+        * Note: if you have been working with a specific user and would like them to review your work, make sure you mention them using (@<username>)
+    * Submit the pull request and iterate with feedback.
+3. Merge and Release
+    * Getting approval:
+        * You will have to iterate on your changes with feedback from other collaborators to reach a point
+        where your pull request can be safely merged.
+        * To avoid too many comments on style and convention, make sure that you have a
+        look at our style section below before creating a pull request.
+        * Eventually, someone from the zstd team will approve your pull request and not long after merge it into
+        the dev branch.
+    * Housekeeping:
+        * Most PRs are linked with one or more Github issues. If this is the case for your PR, make sure
+        the corresponding issue is mentioned. If your change 'fixes' or completely addresses the
+        issue at hand, then please indicate this by requesting that an issue be closed by commenting.
+        * Just because your changes have been merged does not mean the topic or larger issue is complete. Remember
+        that the change must make it to an official zstd release for it to be meaningful. We recommend
+        that contributers track the activity on their pull request and corresponding issue(s) page(s) until
+        their change makes it to the next release of zstd. Users will often discover bugs in your code or
+        suggest ways to refine and improve your initial changes even after the pull request is merged.
+
+## Static Analysis
+Static analysis is a process for examining the correctness or validity of a program without actually
+executing it. It usually helps us find many simple bugs. Zstd uses clang's `scan-build` tool for
+static analysis. You can install it by following the instructions for your OS on https://clang-analyzer.llvm.org/scan-build.
+
+Once installed, you can ensure that our static analysis tests pass on your local development machine
+by running:
+```
+make staticAnalyze
+```
+
+In general, you can use `scan-build` to static analyze any build script. For example, to static analyze
+just `contrib/largeNbDicts` and nothing else, you can run:
+
+```
+scan-build make -C contrib/largeNbDicts largeNbDicts
+```
+
+### Pitfalls of static analysis
+`scan-build` is part of our regular CI suite. Other static analyzers are not.
+
+It can be useful to look at additional static analyzers once in a while (and we do), but it's not a good idea to multiply the nb of analyzers run continuously at each commit and PR. The reasons are :
+
+- Static analyzers are full of false positive. The signal to noise ratio is actually pretty low.
+- A good CI policy is "zero-warning tolerance". That means that all issues must be solved, including false positives. This quickly becomes a tedious workload.
+- Multiple static analyzers will feature multiple kind of false positives, sometimes applying to the same code but in different ways leading to :
+   + torteous code, trying to please multiple constraints, hurting readability and therefore maintenance. Sometimes, such complexity introduce other more subtle bugs, that are just out of scope of the analyzers.
+   + sometimes, these constraints are mutually exclusive : if one try to solve one, the other static analyzer will complain, they can't be both happy at the same time.
+- As if that was not enough, the list of false positives change with each version. It's hard enough to follow one static analyzer, but multiple ones with their own update agenda, this quickly becomes a massive velocity reducer.
+
+This is different from running a static analyzer once in a while, looking at the output, and __cherry picking__ a few warnings that seem helpful, either because they detected a genuine risk of bug, or because it helps expressing the code in a way which is more readable or more difficult to misuse. These kind of reports can be useful, and are accepted.
+
+## Performance
+Performance is extremely important for zstd and we only merge pull requests whose performance
+landscape and corresponding trade-offs have been adequately analyzed, reproduced, and presented.
+This high bar for performance means that every PR which has the potential to
+impact performance takes a very long time for us to properly review. That being said, we
+always welcome contributions to improve performance (or worsen performance for the trade-off of
+something else). Please keep the following in mind before submitting a performance related PR:
+
+1. Zstd isn't as old as gzip but it has been around for time now and its evolution is
+very well documented via past Github issues and pull requests. It may be the case that your
+particular performance optimization has already been considered in the past. Please take some
+time to search through old issues and pull requests using keywords specific to your
+would-be PR. Of course, just because a topic has already been discussed (and perhaps rejected
+on some grounds) in the past, doesn't mean it isn't worth bringing up again. But even in that case,
+it will be helpful for you to have context from that topic's history before contributing.
+2. The distinction between noise and actual performance gains can unfortunately be very subtle
+especially when microbenchmarking extremely small wins or losses. The only remedy to getting
+something subtle merged is extensive benchmarking. You will be doing us a great favor if you
+take the time to run extensive, long-duration, and potentially cross-(os, platform, process, etc)
+benchmarks on your end before submitting a PR. Of course, you will not be able to benchmark
+your changes on every single processor and os out there (and neither will we) but do that best
+you can:) We've adding some things to think about when benchmarking below in the Benchmarking
+Performance section which might be helpful for you.
+3. Optimizing performance for a certain OS, processor vendor, compiler, or network system is a perfectly
+legitimate thing to do as long as it does not harm the overall performance health of Zstd.
+This is a hard balance to strike but please keep in mind other aspects of Zstd when
+submitting changes that are clang-specific, windows-specific, etc.
+
+## Benchmarking Performance
+Performance microbenchmarking is a tricky subject but also essential for Zstd. We value empirical
+testing over theoretical speculation. This guide it not perfect but for most scenarios, it
+is a good place to start.
+
+### Stability
+Unfortunately, the most important aspect in being able to benchmark reliably is to have a stable
+benchmarking machine. A virtual machine, a machine with shared resources, or your laptop
+will typically not be stable enough to obtain reliable benchmark results. If you can get your
+hands on a desktop, this is usually a better scenario.
+
+Of course, benchmarking can be done on non-hyper-stable machines as well. You will just have to
+do a little more work to ensure that you are in fact measuring the changes you've made not and
+noise. Here are some things you can do to make your benchmarks more stable:
+
+1. The most simple thing you can do to drastically improve the stability of your benchmark is
+to run it multiple times and then aggregate the results of those runs. As a general rule of
+thumb, the smaller the change you are trying to measure, the more samples of benchmark runs
+you will have to aggregate over to get reliable results. Here are some additional things to keep in
+mind when running multiple trials:
+    * How you aggregate your samples are important. You might be tempted to use the mean of your
+    results. While this is certainly going to be a more stable number than a raw single sample
+    benchmark number, you might have more luck by taking the median. The mean is not robust to
+    outliers whereas the median is. Better still, you could simply take the fastest speed your
+    benchmark achieved on each run since that is likely the fastest your process will be
+    capable of running your code. In our experience, this (aggregating by just taking the sample
+    with the fastest running time) has been the most stable approach.
+    * The more samples you have, the more stable your benchmarks should be. You can verify
+    your improved stability by looking at the size of your confidence intervals as you
+    increase your sample count. These should get smaller and smaller. Eventually hopefully
+    smaller than the performance win you are expecting.
+    * Most processors will take some time to get `hot` when running anything. The observations
+    you collect during that time period will very different from the true performance number. Having
+    a very large number of sample will help alleviate this problem slightly but you can also
+    address is directly by simply not including the first `n` iterations of your benchmark in
+    your aggregations. You can determine `n` by simply looking at the results from each iteration
+    and then hand picking a good threshold after which the variance in results seems to stabilize.
+2. You cannot really get reliable benchmarks if your host machine is simultaneously running
+another cpu/memory-intensive application in the background. If you are running benchmarks on your
+personal laptop for instance, you should close all applications (including your code editor and
+browser) before running your benchmarks. You might also have invisible background applications
+running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
+on Windows. You will get more stable benchmark results of you end those processes as well.
+    * If you have multiple cores, you can even run your benchmark on a reserved core to prevent
+    pollution from other OS and user processes. There are a number of ways to do this depending
+    on your OS:
+        * On linux boxes, you have use https://github.com/lpechacek/cpuset.
+        * On Windows, you can "Set Processor Affinity" using https://www.thewindowsclub.com/processor-affinity-windows
+        * On Mac, you can try to use their dedicated affinity API https://developer.apple.com/library/archive/releasenotes/Performance/RN-AffinityAPI/#//apple_ref/doc/uid/TP40006635-CH1-DontLinkElementID_2
+3. To benchmark, you will likely end up writing a separate c/c++ program that will link libzstd.
+Dynamically linking your library will introduce some added variation (not a large amount but
+definitely some). Statically linking libzstd will be more stable. Static libraries should
+be enabled by default when building zstd.
+4. Use a profiler with a good high resolution timer. See the section below on profiling for
+details on this.
+5. Disable frequency scaling, turbo boost and address space randomization (this will vary by OS)
+6. Try to avoid storage. On some systems you can use tmpfs. Putting the program, inputs and outputs on
+tmpfs avoids touching a real storage system, which can have a pretty big variability.
+
+Also check our LLVM's guide on benchmarking here: https://llvm.org/docs/Benchmarking.html
+
+### Zstd benchmark
+The fastest signal you can get regarding your performance changes is via the in-build zstd cli
+bench option. You can run Zstd as you typically would for your scenario using some set of options
+and then additionally also specify the `-b#` option. Doing this will run our benchmarking pipeline
+for that options you have just provided. If you want to look at the internals of how this
+benchmarking script works, you can check out programs/benchzstd.c
+
+For example: say you have made a change that you believe improves the speed of zstd level 1. The
+very first thing you should use to asses whether you actually achieved any sort of improvement
+is `zstd -b`. You might try to do something like this. Note: you can use the `-i` option to
+specify a running time for your benchmark in seconds (default is 3 seconds).
+Usually, the longer the running time, the more stable your results will be.
+
+```
+$ git checkout <commit-before-your-change>
+$ make && cp zstd zstd-old
+$ git checkout <commit-after-your-change>
+$ make && cp zstd zstd-new
+$ zstd-old -i5 -b1 <your-test-data>
+ 1<your-test-data>         :      8990 ->      3992 (2.252), 302.6 MB/s , 626.4 MB/s
+$ zstd-new -i5 -b1 <your-test-data>
+ 1<your-test-data>         :      8990 ->      3992 (2.252), 302.8 MB/s , 628.4 MB/s
+```
+
+Unless your performance win is large enough to be visible despite the intrinsic noise
+on your computer, benchzstd alone will likely not be enough to validate the impact of your
+changes. For example, the results of the example above indicate that effectively nothing
+changed but there could be a small <3% improvement that the noise on the host machine
+obscured. So unless you see a large performance win (10-15% consistently) using just
+this method of evaluation will not be sufficient.
+
+### Profiling
+There are a number of great profilers out there. We're going to briefly mention how you can
+profile your code using `instruments` on mac, `perf` on linux and `visual studio profiler`
+on windows.
+
+Say you have an idea for a change that you think will provide some good performance gains
+for level 1 compression on Zstd. Typically this means, you have identified a section of
+code that you think can be made to run faster.
+
+The first thing you will want to do is make sure that the piece of code is actually taking up
+a notable amount of time to run. It is usually not worth optimzing something which accounts for less than
+0.0001% of the total running time. Luckily, there are tools to help with this.
+Profilers will let you see how much time your code spends inside a particular function.
+If your target code snippit is only part of a function, it might be worth trying to
+isolate that snippit by moving it to its own function (this is usually not necessary but
+might be).
+
+Most profilers (including the profilers dicusssed below) will generate a call graph of
+functions for you. Your goal will be to find your function of interest in this call grapch
+and then inspect the time spent inside of it. You might also want to to look at the
+annotated assembly which most profilers will provide you with.
+
+#### Instruments
+We will once again consider the scenario where you think you've identified a piece of code
+whose performance can be improved upon. Follow these steps to profile your code using
+Instruments.
+
+1. Open Instruments
+2. Select `Time Profiler` from the list of standard templates
+3. Close all other applications except for your instruments window and your terminal
+4. Run your benchmarking script from your terminal window
+    * You will want a benchmark that runs for at least a few seconds (5 seconds will
+    usually be long enough). This way the profiler will have something to work with
+    and you will have ample time to attach your profiler to this process:)
+    * I will just use benchzstd as my bencharmking script for this example:
+```
+$ zstd -b1 -i5 <my-data> # this will run for 5 seconds
+```
+5. Once you run your benchmarking script, switch back over to instruments and attach your
+process to the time profiler. You can do this by:
+    * Clicking on the `All Processes` drop down in the top left of the toolbar.
+    * Selecting your process from the dropdown. In my case, it is just going to be labled
+    `zstd`
+    * Hitting the bright red record circle button on the top left of the toolbar
+6. You profiler will now start collecting metrics from your bencharking script. Once
+you think you have collected enough samples (usually this is the case after 3 seconds of
+recording), stop your profiler.
+7. Make sure that in toolbar of the bottom window, `profile` is selected.
+8. You should be able to see your call graph.
+    * If you don't see the call graph or an incomplete call graph, make sure you have compiled
+    zstd and your benchmarking scripg using debug flags. On mac and linux, this just means
+    you will have to supply the `-g` flag alone with your build script. You might also
+    have to provide the `-fno-omit-frame-pointer` flag
+9. Dig down the graph to find your function call and then inspect it by double clicking
+the list item. You will be able to see the annotated source code and the assembly side by
+side.
+
+#### Perf
+
+This wiki has a pretty detailed tutorial on getting started working with perf so we'll
+leave you to check that out of you're getting started:
+
+https://perf.wiki.kernel.org/index.php/Tutorial
+
+Some general notes on perf:
+* Use `perf stat -r # <bench-program>` to quickly get some relevant timing and
+counter statistics. Perf uses a high resolution timer and this is likely one
+of the first things your team will run when assessing your PR.
+* Perf has a long list of hardware counters that can be viewed with `perf --list`.
+When measuring optimizations, something worth trying is to make sure the handware
+counters you expect to be impacted by your change are in fact being so. For example,
+if you expect the L1 cache misses to decrease with your change, you can look at the
+counter `L1-dcache-load-misses`
+* Perf hardware counters will not work on a virtual machine.
+
+#### Visual Studio
+
+TODO
+
+
+## Setting up continuous integration (CI) on your fork
+Zstd uses a number of different continuous integration (CI) tools to ensure that new changes
+are well tested before they make it to an official release. Specifically, we use the platforms
+travis-ci, circle-ci, and appveyor.
+
+Changes cannot be merged into the main dev branch unless they pass all of our CI tests.
+The easiest way to run these CI tests on your own before submitting a PR to our dev branch
+is to configure your personal fork of zstd with each of the CI platforms. Below, you'll find
+instructions for doing this.
+
+### travis-ci
+Follow these steps to link travis-ci with your github fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://travis-ci.org/
+3. Click 'Sign in with Github' on the top right
+4. Click 'Authorize travis-ci'
+5. Click 'Activate all repositories using Github Apps'
+6. Select 'Only select repositories' and select your fork of zstd from the drop down
+7. Click 'Approve and Install'
+8. Click 'Sign in with Github' again. This time, it will be for travis-pro (which will let you view your tests on the web dashboard)
+9. Click 'Authorize travis-pro'
+10. You should have travis set up on your fork now.
+
+### circle-ci
+TODO
+
+### appveyor
+Follow these steps to link circle-ci with your girhub fork of zstd
+
+1. Make sure you are logged into your github account
+2. Go to https://www.appveyor.com/
+3. Click 'Sign in' on the top right
+4. Select 'Github' on the left panel
+5. Click 'Authorize appveyor'
+6. You might be asked to select which repositories you want to give appveyor permission to. Select your fork of zstd if you're prompted
+7. You should have appveyor set up on your fork now.
+
+### General notes on CI
+CI tests run every time a pull request (PR) is created or updated. The exact tests
+that get run will depend on the destination branch you specify. Some tests take
+longer to run than others. Currently, our CI is set up to run a short
+series of tests when creating a PR to the dev branch and a longer series of tests
+when creating a PR to the release branch. You can look in the configuration files
+of the respective CI platform for more information on what gets run when.
+
+Most people will just want to create a PR with the destination set to their local dev
+branch of zstd. You can then find the status of the tests on the PR's page. You can also
+re-run tests and cancel running tests from the PR page or from the respective CI's dashboard.
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
+## Coding Style
+It's a pretty long topic, which is difficult to summarize in a single paragraph.
+As a rule of thumbs, try to imitate the coding style of
+similar lines of codes around your contribution.
+The following is a non-exhaustive list of rules employed in zstd code base:
+
+### C90
+This code base is following strict C90 standard,
+with 2 extensions : 64-bit `long long` types, and variadic macros.
+This rule is applied strictly to code within `lib/` and `programs/`.
+Sub-project in `contrib/` are allowed to use other conventions.
+
+### C++ direct compatibility : symbol mangling
+All public symbol declarations must be wrapped in `extern “C” { … }`,
+so that this project can be compiled as C++98 code,
+and linked into C++ applications.
+
+### Minimal Frugal
+This design requirement is fundamental to preserve the portability of the code base.
+#### Dependencies
+- Reduce dependencies to the minimum possible level.
+  Any dependency should be considered “bad” by default,
+  and only tolerated because it provides a service in a better way than can be achieved locally.
+  The only external dependencies this repository tolerates are
+  standard C libraries, and in rare cases, system level headers.
+- Within `lib/`, this policy is even more drastic.
+  The only external dependencies allowed are `<assert.h>`, `<stdlib.h>`, `<string.h>`,
+  and even then, not directly.
+  In particular, no function shall ever allocate on heap directly,
+  and must use instead `ZSTD_malloc()` and equivalent.
+  Other accepted non-symbol headers are `<stddef.h>` and `<limits.h>`.
+- Within the project, there is a strict hierarchy of dependencies that must be respected.
+  `programs/` is allowed to depend on `lib/`, but only its public API.
+  Within `lib/`, `lib/common` doesn't depend on any other directory.
+  `lib/compress` and `lib/decompress` shall not depend on each other.
+  `lib/dictBuilder` can depend on `lib/common` and `lib/compress`, but not `lib/decompress`.
+#### Resources
+- Functions in `lib/` must use very little stack space,
+  several dozens of bytes max.
+  Everything larger must use the heap allocator,
+  or require a scratch buffer to be emplaced manually.
+
+### Naming
+* All public symbols are prefixed with `ZSTD_`
+  + private symbols, with a scope limited to their own unit, are free of this restriction.
+    However, since `libzstd` source code can be amalgamated,
+    each symbol name must attempt to be (and remain) unique.
+    Avoid too generic names that could become ground for future collisions.
+    This generally implies usage of some form of prefix.
+* For symbols (functions and variables), naming convention is `PREFIX_camelCase`.
+  + In some advanced cases, one can also find :
+    - `PREFIX_prefix2_camelCase`
+    - `PREFIX_camelCase_extendedQualifier`
+* Multi-words names generally consist of an action followed by object:
+  - for example : `ZSTD_createCCtx()`
+* Prefer positive actions
+  - `goBackward` rather than `notGoForward`
+* Type names (`struct`, etc.) follow similar convention,
+  except that they are allowed and even invited to start by an Uppercase letter.
+  Example : `ZSTD_CCtx`, `ZSTD_CDict`
+* Macro names are all Capital letters.
+  The same composition rules (`PREFIX_NAME_QUALIFIER`) apply.
+* File names are all lowercase letters.
+  The convention is `snake_case`.
+  File names **must** be unique across the entire code base,
+  even when they stand in clearly separated directories.
+
+### Qualifiers
+* This code base is `const` friendly, if not `const` fanatical.
+  Any variable that can be `const` (aka. read-only) **must** be `const`.
+  Any pointer which content will not be modified must be `const`.
+  This property is then controlled at compiler level.
+  `const` variables are an important signal to readers that this variable isn’t modified.
+  Conversely, non-const variables are a signal to readers to watch out for modifications later on in the function.
+* If a function must be inlined, mention it explicitly,
+  using project's own portable macros, such as `FORCE_INLINE_ATTR`,
+  defined in `lib/common/compiler.h`.
+
+### Debugging
+* **Assertions** are welcome, and should be used very liberally,
+  to control any condition the code expects for its correct execution.
+  These assertion checks will be run in debug builds, and disabled in production.
+* For traces, this project provides its own debug macros,
+  in particular `DEBUGLOG(level, ...)`, defined in `lib/common/debug.h`.
+
+### Code documentation
+* Avoid code documentation that merely repeats what the code is already stating.
+  Whenever applicable, prefer employing the code as the primary way to convey explanations.
+  Example 1 : `int nbTokens = n;` instead of `int i = n; /* i is a nb of tokens *./`.
+  Example 2 : `assert(size > 0);` instead of `/* here, size should be positive */`.
+* At declaration level, the documentation explains how to use the function or variable
+  and when applicable why it's needed, of the scenarios where it can be useful.
+* At implementation level, the documentation explains the general outline of the algorithm employed,
+  and when applicable why this specific choice was preferred.
+
+### General layout
+* 4 spaces for indentation rather than tabs
+* Code documentation shall directly precede function declaration or implementation
+* Function implementations and its code documentation should be preceded and followed by an empty line
+
+
+## License
+By contributing to Zstandard, you agree that your contributions will be licensed
+under both the [LICENSE](LICENSE) file and the [COPYING](COPYING) file in the root directory of this source tree.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/COPYING
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/COPYING
@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/LICENSE
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/LICENSE
@ -0,0 +1,30 @@
+BSD License
+
+For Zstandard software
+
+Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/Makefile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/Makefile
@ -0,0 +1,420 @@
+# ################################################################
+# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
+
+# verbose mode (print commands) on V=1 or VERBOSE=1
+Q = $(if $(filter 1,$(V) $(VERBOSE)),,@)
+
+PRGDIR   = programs
+ZSTDDIR  = lib
+BUILDIR  = build
+ZWRAPDIR = zlibWrapper
+TESTDIR  = tests
+FUZZDIR  = $(TESTDIR)/fuzz
+
+# Define nul output
+VOID = /dev/null
+
+# When cross-compiling from linux to windows, you might
+# need to specify this as "Windows." Fedora build fails
+# without it.
+#
+# Note: mingw-w64 build from linux to windows does not
+# fail on other tested distros (ubuntu, debian) even
+# without manually specifying the TARGET_SYSTEM.
+TARGET_SYSTEM ?= $(OS)
+
+ifneq (,$(filter Windows%,$(TARGET_SYSTEM)))
+  EXT =.exe
+else
+  EXT =
+endif
+
+## default: Build lib-release and zstd-release
+.PHONY: default
+default: lib-release zstd-release
+
+.PHONY: all
+all: allmost examples manual contrib
+
+.PHONY: allmost
+allmost: allzstd zlibwrapper
+
+# skip zwrapper, can't build that on alternate architectures without the proper zlib installed
+.PHONY: allzstd
+allzstd: lib
+	$(Q)$(MAKE) -C $(PRGDIR) all
+	$(Q)$(MAKE) -C $(TESTDIR) all
+
+.PHONY: all32
+all32:
+	$(MAKE) -C $(PRGDIR) zstd32
+	$(MAKE) -C $(TESTDIR) all32
+
+.PHONY: lib lib-release lib-mt lib-nomt
+lib lib-release lib-mt lib-nomt:
+	$(Q)$(MAKE) -C $(ZSTDDIR) $@
+
+.PHONY: zstd zstd-release
+zstd zstd-release:
+	$(Q)$(MAKE) -C $(PRGDIR) $@
+	$(Q)ln -sf $(PRGDIR)/zstd$(EXT) zstd$(EXT)
+
+.PHONY: zstdmt
+zstdmt:
+	$(Q)$(MAKE) -C $(PRGDIR) $@
+	$(Q)cp $(PRGDIR)/zstd$(EXT) ./zstdmt$(EXT)
+
+.PHONY: zlibwrapper
+zlibwrapper: lib
+	$(MAKE) -C $(ZWRAPDIR) all
+
+## test: run long-duration tests
+.PHONY: test
+DEBUGLEVEL ?= 1
+test: MOREFLAGS += -g -Werror
+test:
+	DEBUGLEVEL=$(DEBUGLEVEL) MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
+	$(MAKE) -C $(TESTDIR) $@
+	ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder $@
+
+## shortest: same as `make check`
+.PHONY: shortest
+shortest:
+	$(Q)$(MAKE) -C $(TESTDIR) $@
+
+## check: run basic tests for `zstd` cli
+.PHONY: check
+check: shortest
+
+.PHONY: automated_benchmarking
+automated_benchmarking:
+	$(MAKE) -C $(TESTDIR) $@
+
+.PHONY: benchmarking
+benchmarking: automated_benchmarking
+
+## examples: build all examples in `examples/` directory
+.PHONY: examples
+examples: lib
+	$(MAKE) -C examples all
+
+## manual: generate API documentation in html format
+.PHONY: manual
+manual:
+	$(MAKE) -C contrib/gen_html $@
+
+## man: generate man page
+.PHONY: man
+man:
+	$(MAKE) -C programs $@
+
+## contrib: build all supported projects in `/contrib` directory
+.PHONY: contrib
+contrib: lib
+	$(MAKE) -C contrib/pzstd all
+	$(MAKE) -C contrib/seekable_format/examples all
+	$(MAKE) -C contrib/seekable_format/tests test
+	$(MAKE) -C contrib/largeNbDicts all
+	cd build/single_file_libs/ ; ./build_decoder_test.sh
+	cd build/single_file_libs/ ; ./build_library_test.sh
+
+.PHONY: cleanTabs
+cleanTabs:
+	cd contrib; ./cleanTabs
+
+.PHONY: clean
+clean:
+	$(Q)$(MAKE) -C $(ZSTDDIR) $@ > $(VOID)
+	$(Q)$(MAKE) -C $(PRGDIR) $@ > $(VOID)
+	$(Q)$(MAKE) -C $(TESTDIR) $@ > $(VOID)
+	$(Q)$(MAKE) -C $(ZWRAPDIR) $@ > $(VOID)
+	$(Q)$(MAKE) -C examples/ $@ > $(VOID)
+	$(Q)$(MAKE) -C contrib/gen_html $@ > $(VOID)
+	$(Q)$(MAKE) -C contrib/pzstd $@ > $(VOID)
+	$(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
+	$(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
+	$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
+	$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
+	$(Q)$(RM) -r lz4
+	@echo Cleaning completed
+
+#------------------------------------------------------------------------------
+# make install is validated only for Linux, macOS, Hurd and some BSD targets
+#------------------------------------------------------------------------------
+ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD DragonFly NetBSD MSYS_NT Haiku))
+
+HOST_OS = POSIX
+
+HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
+EGREP_OPTIONS ?=
+ifeq ($HAVE_COLORNEVER, 1)
+EGREP_OPTIONS += --color=never
+endif
+EGREP = egrep $(EGREP_OPTIONS)
+
+# Print a two column output of targets and their description. To add a target description, put a
+# comment in the Makefile with the format "## <TARGET>: <DESCRIPTION>".  For example:
+#
+## list: Print all targets and their descriptions (if provided)
+.PHONY: list
+list:
+	$(Q)TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \
+		| awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \
+		| $(EGREP) -v  -e '^[^[:alnum:]]' | sort); \
+	{ \
+	    printf "Target Name\tDescription\n"; \
+	    printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \
+	    for target in $$TARGETS; do \
+	        line=$$($(EGREP) "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \
+	        description=$$(echo $$line | awk '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \
+	        printf "$$target\t$$description\n"; \
+	    done \
+	} | column -t -s $$'\t'
+
+.PHONY: install armtest usan asan uasan msan asan32
+install:
+	$(Q)$(MAKE) -C $(ZSTDDIR) $@
+	$(Q)$(MAKE) -C $(PRGDIR) $@
+
+.PHONY: uninstall
+uninstall:
+	$(Q)$(MAKE) -C $(ZSTDDIR) $@
+	$(Q)$(MAKE) -C $(PRGDIR) $@
+
+.PHONY: travis-install
+travis-install:
+	$(MAKE) install PREFIX=~/install_test_dir
+
+.PHONY: gcc5build gcc6build gcc7build clangbuild m32build armbuild aarch64build ppcbuild ppc64build
+gcc5build: clean
+	gcc-5 -v
+	CC=gcc-5 $(MAKE) all MOREFLAGS="-Werror"
+
+gcc6build: clean
+	gcc-6 -v
+	CC=gcc-6 $(MAKE) all MOREFLAGS="-Werror"
+
+gcc7build: clean
+	gcc-7 -v
+	CC=gcc-7 $(MAKE) all MOREFLAGS="-Werror"
+
+clangbuild: clean
+	clang -v
+	CXX=clang++ CC=clang CFLAGS="-Werror -Wconversion -Wno-sign-conversion -Wdocumentation" $(MAKE) all
+
+m32build: clean
+	gcc -v
+	$(MAKE) all32
+
+armbuild: clean
+	CC=arm-linux-gnueabi-gcc CFLAGS="-Werror" $(MAKE) allzstd
+
+aarch64build: clean
+	CC=aarch64-linux-gnu-gcc CFLAGS="-Werror" $(MAKE) allzstd
+
+ppcbuild: clean
+	CC=powerpc-linux-gnu-gcc CFLAGS="-m32 -Wno-attributes -Werror" $(MAKE) -j allzstd
+
+ppc64build: clean
+	CC=powerpc-linux-gnu-gcc CFLAGS="-m64 -Werror" $(MAKE) -j allzstd
+
+.PHONY: armfuzz aarch64fuzz ppcfuzz ppc64fuzz
+armfuzz: clean
+	CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
+
+aarch64fuzz: clean
+	ld -v
+	CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
+
+ppcfuzz: clean
+	CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static MOREFLAGS="-static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
+
+ppc64fuzz: clean
+	CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) fuzztest
+
+.PHONY: cxxtest gcc5test gcc6test armtest aarch64test ppctest ppc64test
+cxxtest: CXXFLAGS += -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror
+cxxtest: clean
+	$(MAKE) -C $(PRGDIR) all CC="$(CXX) -Wno-deprecated" CFLAGS="$(CXXFLAGS)"   # adding -Wno-deprecated to avoid clang++ warning on dealing with C files directly
+
+gcc5test: clean
+	gcc-5 -v
+	$(MAKE) all CC=gcc-5 MOREFLAGS="-Werror"
+
+gcc6test: clean
+	gcc-6 -v
+	$(MAKE) all CC=gcc-6 MOREFLAGS="-Werror"
+
+armtest: clean
+	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
+	$(MAKE) -C $(TESTDIR) test CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
+
+aarch64test:
+	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
+	$(MAKE) -C $(TESTDIR) test CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static" FUZZER_FLAGS=--no-big-tests
+
+ppctest: clean
+	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
+	$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static" FUZZER_FLAGS=--no-big-tests
+
+ppc64test: clean
+	$(MAKE) -C $(TESTDIR) datagen   # use native, faster
+	$(MAKE) -C $(TESTDIR) test CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static" FUZZER_FLAGS=--no-big-tests
+
+.PHONY: arm-ppc-compilation
+arm-ppc-compilation:
+	$(MAKE) -C $(PRGDIR) clean zstd CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
+	$(MAKE) -C $(PRGDIR) clean zstd CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static ZSTDRTTEST= MOREFLAGS="-Werror -static"
+	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
+	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
+
+regressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest
+
+uasanregressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
+
+msanregressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
+
+# run UBsan with -fsanitize-recover=pointer-overflow
+# this only works with recent compilers such as gcc 8+
+usan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=undefined -Werror"
+
+asan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=address -Werror"
+
+asan-%: clean
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=address -Werror" $(MAKE) -C $(TESTDIR) $*
+
+msan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=memory -fno-omit-frame-pointer -Werror" HAVE_LZMA=0   # datagen.c fails this test for no obvious reason
+
+msan-%: clean
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=memory -fno-omit-frame-pointer -Werror" FUZZER_FLAGS=--no-big-tests $(MAKE) -C $(TESTDIR) HAVE_LZMA=0 $*
+
+asan32: clean
+	$(MAKE) -C $(TESTDIR) test32 CC=clang MOREFLAGS="-g -fsanitize=address"
+
+uasan: clean
+	$(MAKE) test CC=clang MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror"
+
+uasan-%: clean
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize-recover=pointer-overflow -fsanitize=address,undefined -Werror" $(MAKE) -C $(TESTDIR) $*
+
+tsan-%: clean
+	LDFLAGS=-fuse-ld=gold MOREFLAGS="-g -fno-sanitize-recover=all -fsanitize=thread -Werror" $(MAKE) -C $(TESTDIR) $* FUZZER_FLAGS=--no-big-tests
+
+.PHONY: apt-install
+apt-install:
+	sudo apt-get -yq --no-install-suggests --no-install-recommends --force-yes install $(APT_PACKAGES)
+
+.PHONY: apt-add-repo
+apt-add-repo:
+	sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+	sudo apt-get update -y -qq
+
+.PHONY: ppcinstall arminstall valgrindinstall libc6install gcc6install gcc7install gcc8install gpp6install clang38install lz4install
+ppcinstall:
+	APT_PACKAGES="qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu" $(MAKE) apt-install
+
+arminstall:
+	APT_PACKAGES="qemu-system-arm qemu-user-static gcc-arm-linux-gnueabi libc6-dev-armel-cross gcc-aarch64-linux-gnu libc6-dev-arm64-cross" $(MAKE) apt-install
+
+valgrindinstall:
+	APT_PACKAGES="valgrind" $(MAKE) apt-install
+
+libc6install:
+	APT_PACKAGES="libc6-dev-i386 gcc-multilib" $(MAKE) apt-install
+
+gcc6install: apt-add-repo
+	APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-6 gcc-6-multilib" $(MAKE) apt-install
+
+gcc7install: apt-add-repo
+	APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-7 gcc-7-multilib" $(MAKE) apt-install
+
+gcc8install: apt-add-repo
+	APT_PACKAGES="libc6-dev-i386 gcc-multilib gcc-8 gcc-8-multilib" $(MAKE) apt-install
+
+gpp6install: apt-add-repo
+	APT_PACKAGES="libc6-dev-i386 g++-multilib gcc-6 g++-6 g++-6-multilib" $(MAKE) apt-install
+
+clang38install:
+	APT_PACKAGES="clang-3.8" $(MAKE) apt-install
+
+# Ubuntu 14.04 ships a too-old lz4
+lz4install:
+	[ -e lz4 ] || git clone https://github.com/lz4/lz4 && sudo $(MAKE) -C lz4 install
+
+endif
+
+
+CMAKE_PARAMS = -DZSTD_BUILD_CONTRIB:BOOL=ON -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_ZLIB_SUPPORT:BOOL=ON -DZSTD_LZMA_SUPPORT:BOOL=ON -DCMAKE_BUILD_TYPE=Release
+
+ifneq (,$(filter MSYS%,$(shell uname)))
+HOST_OS = MSYS
+CMAKE_PARAMS = -G"MSYS Makefiles" -DCMAKE_BUILD_TYPE=Debug -DZSTD_MULTITHREAD_SUPPORT:BOOL=OFF -DZSTD_BUILD_STATIC:BOOL=ON -DZSTD_BUILD_TESTS:BOOL=ON
+endif
+
+#------------------------------------------------------------------------
+# target specific tests
+#------------------------------------------------------------------------
+ifneq (,$(filter $(HOST_OS),MSYS POSIX))
+.PHONY: cmakebuild c89build gnu90build c99build gnu99build c11build bmix64build bmix32build bmi32build staticAnalyze
+cmakebuild:
+	cmake --version
+	$(RM) -r $(BUILDIR)/cmake/build
+	mkdir $(BUILDIR)/cmake/build
+	cd $(BUILDIR)/cmake/build; cmake -DCMAKE_INSTALL_PREFIX:PATH=~/install_test_dir $(CMAKE_PARAMS) ..
+	$(MAKE) -C $(BUILDIR)/cmake/build -j4;
+	$(MAKE) -C $(BUILDIR)/cmake/build install;
+	$(MAKE) -C $(BUILDIR)/cmake/build uninstall;
+	cd $(BUILDIR)/cmake/build; ctest -V -L Medium
+
+c89build: clean
+	$(CC) -v
+	CFLAGS="-std=c89 -Werror" $(MAKE) allmost  # will fail, due to missing support for `long long`
+
+gnu90build: clean
+	$(CC) -v
+	CFLAGS="-std=gnu90 -Werror" $(MAKE) allmost
+
+c99build: clean
+	$(CC) -v
+	CFLAGS="-std=c99 -Werror" $(MAKE) allmost
+
+gnu99build: clean
+	$(CC) -v
+	CFLAGS="-std=gnu99 -Werror" $(MAKE) allmost
+
+c11build: clean
+	$(CC) -v
+	CFLAGS="-std=c11 -Werror" $(MAKE) allmost
+
+bmix64build: clean
+	$(CC) -v
+	CFLAGS="-O3 -mbmi -Werror" $(MAKE) -C $(TESTDIR) test
+
+bmix32build: clean
+	$(CC) -v
+	CFLAGS="-O3 -mbmi -mx32 -Werror" $(MAKE) -C $(TESTDIR) test
+
+bmi32build: clean
+	$(CC) -v
+	CFLAGS="-O3 -mbmi -m32 -Werror" $(MAKE) -C $(TESTDIR) test
+
+# static analyzer test uses clang's scan-build
+# does not analyze zlibWrapper, due to detected issues in zlib source code
+staticAnalyze: SCANBUILD ?= scan-build
+staticAnalyze:
+	$(CC) -v
+	CC=$(CC) CPPFLAGS=-g $(SCANBUILD) --status-bugs -v $(MAKE) allzstd examples contrib
+endif
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/README.md
@ -0,0 +1,199 @@
+<p align="center"><img src="https://raw.githubusercontent.com/facebook/zstd/dev/doc/images/zstd_logo86.png" alt="Zstandard"></p>
+
+__Zstandard__, or `zstd` as short version, is a fast lossless compression algorithm,
+targeting real-time compression scenarios at zlib-level and better compression ratios.
+It's backed by a very fast entropy stage, provided by [Huff0 and FSE library](https://github.com/Cyan4973/FiniteStateEntropy).
+
+The project is provided as an open-source dual [BSD](LICENSE) and [GPLv2](COPYING) licensed **C** library,
+and a command line utility producing and decoding `.zst`, `.gz`, `.xz` and `.lz4` files.
+Should your project require another programming language,
+a list of known ports and bindings is provided on [Zstandard homepage](http://www.zstd.net/#other-languages).
+
+**Development branch status:**
+
+[![Build Status][travisDevBadge]][travisLink]
+[![Build status][AppveyorDevBadge]][AppveyorLink]
+[![Build status][CircleDevBadge]][CircleLink]
+[![Build status][CirrusDevBadge]][CirrusLink]
+[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
+
+[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
+[travisLink]: https://travis-ci.org/facebook/zstd
+[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/xt38wbdxjk5mrbem/branch/dev?svg=true "Windows test suite"
+[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/zstd-p0yf0
+[CircleDevBadge]: https://circleci.com/gh/facebook/zstd/tree/dev.svg?style=shield "Short test suite"
+[CircleLink]: https://circleci.com/gh/facebook/zstd
+[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
+[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
+[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
+[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd
+
+## Benchmarks
+
+For reference, several fast compression algorithms were tested and compared
+on a server running Arch Linux (`Linux version 5.5.11-arch1-1`),
+with a Core i9-9900K CPU @ 5.0GHz,
+using [lzbench], an open-source in-memory benchmark by @inikep
+compiled with [gcc] 9.3.0,
+on the [Silesia compression corpus].
+
+[lzbench]: https://github.com/inikep/lzbench
+[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
+[gcc]: https://gcc.gnu.org/
+
+| Compressor name         | Ratio | Compression| Decompress.|
+| ---------------         | ------| -----------| ---------- |
+| **zstd 1.4.5 -1**       | 2.884 |   500 MB/s |  1660 MB/s |
+| zlib 1.2.11 -1          | 2.743 |    90 MB/s |   400 MB/s |
+| brotli 1.0.7 -0         | 2.703 |   400 MB/s |   450 MB/s |
+| **zstd 1.4.5 --fast=1** | 2.434 |   570 MB/s |  2200 MB/s |
+| **zstd 1.4.5 --fast=3** | 2.312 |   640 MB/s |  2300 MB/s |
+| quicklz 1.5.0 -1        | 2.238 |   560 MB/s |   710 MB/s |
+| **zstd 1.4.5 --fast=5** | 2.178 |   700 MB/s |  2420 MB/s |
+| lzo1x 2.10 -1           | 2.106 |   690 MB/s |   820 MB/s |
+| lz4 1.9.2               | 2.101 |   740 MB/s |  4530 MB/s |
+| **zstd 1.4.5 --fast=7** | 2.096 |   750 MB/s |  2480 MB/s |
+| lzf 3.6 -1              | 2.077 |   410 MB/s |   860 MB/s |
+| snappy 1.1.8            | 2.073 |   560 MB/s |  1790 MB/s |
+
+[zlib]: http://www.zlib.net/
+[LZ4]: http://www.lz4.org/
+
+The negative compression levels, specified with `--fast=#`,
+offer faster compression and decompression speed in exchange for some loss in
+compression ratio compared to level 1, as seen in the table above.
+
+Zstd can also offer stronger compression ratios at the cost of compression speed.
+Speed vs Compression trade-off is configurable by small increments.
+Decompression speed is preserved and remains roughly the same at all settings,
+a property shared by most LZ compression algorithms, such as [zlib] or lzma.
+
+The following tests were run
+on a server running Linux Debian (`Linux version 4.14.0-3-amd64`)
+with a Core i7-6700K CPU @ 4.0GHz,
+using [lzbench], an open-source in-memory benchmark by @inikep
+compiled with [gcc] 7.3.0,
+on the [Silesia compression corpus].
+
+Compression Speed vs Ratio | Decompression Speed
+---------------------------|--------------------
+![Compression Speed vs Ratio](doc/images/CSpeed2.png "Compression Speed vs Ratio") | ![Decompression Speed](doc/images/DSpeed3.png "Decompression Speed")
+
+A few other algorithms can produce higher compression ratios at slower speeds, falling outside of the graph.
+For a larger picture including slow modes, [click on this link](doc/images/DCspeed5.png).
+
+
+## The case for Small Data compression
+
+Previous charts provide results applicable to typical file and stream scenarios (several MB). Small data comes with different perspectives.
+
+The smaller the amount of data to compress, the more difficult it is to compress. This problem is common to all compression algorithms, and reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new data set, there is no "past" to build upon.
+
+To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data.
+Training Zstandard is achieved by providing it with a few samples (one file per sample). The result of this training is stored in a file called "dictionary", which must be loaded before compression and decompression.
+Using this dictionary, the compression ratio achievable on small data improves dramatically.
+
+The following example uses the `github-users` [sample set](https://github.com/facebook/zstd/releases/tag/v1.1.3), created from [github public API](https://developer.github.com/v3/users/#get-all-users).
+It consists of roughly 10K records weighing about 1KB each.
+
+Compression Ratio | Compression Speed | Decompression Speed
+------------------|-------------------|--------------------
+![Compression Ratio](doc/images/dict-cr.png "Compression Ratio") | ![Compression Speed](doc/images/dict-cs.png "Compression Speed") | ![Decompression Speed](doc/images/dict-ds.png "Decompression Speed")
+
+
+These compression gains are achieved while simultaneously providing _faster_ compression and decompression speeds.
+
+Training works if there is some correlation in a family of small data samples. The more data-specific a dictionary is, the more efficient it is (there is no _universal dictionary_).
+Hence, deploying one dictionary per type of data will provide the greatest benefits.
+Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will gradually use previously decoded content to better compress the rest of the file.
+
+### Dictionary compression How To:
+
+1. Create the dictionary
+
+   `zstd --train FullPathToTrainingSet/* -o dictionaryName`
+
+2. Compress with dictionary
+
+   `zstd -D dictionaryName FILE`
+
+3. Decompress with dictionary
+
+   `zstd -D dictionaryName --decompress FILE.zst`
+
+
+## Build instructions
+
+### Makefile
+
+If your system is compatible with standard `make` (or `gmake`),
+invoking `make` in root directory will generate `zstd` cli in root directory.
+
+Other available options include:
+- `make install` : create and install zstd cli, library and man pages
+- `make check` : create and run `zstd`, tests its behavior on local platform
+
+### cmake
+
+A `cmake` project generator is provided within `build/cmake`.
+It can generate Makefiles or other build scripts
+to create `zstd` binary, and `libzstd` dynamic and static libraries.
+
+By default, `CMAKE_BUILD_TYPE` is set to `Release`.
+
+### Meson
+
+A Meson project is provided within [`build/meson`](build/meson). Follow
+build instructions in that directory.
+
+You can also take a look at [`.travis.yml`](.travis.yml) file for an
+example about how Meson is used to build this project.
+
+Note that default build type is **release**.
+
+### VCPKG
+You can build and install zstd [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    ./vcpkg install zstd
+
+The zstd port in vcpkg is kept up to date by Microsoft team members and community contributors.
+If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
+### Visual Studio (Windows)
+
+Going into `build` directory, you will find additional possibilities:
+- Projects for Visual Studio 2005, 2008 and 2010.
+  + VS2010 project is compatible with VS2012, VS2013, VS2015 and VS2017.
+- Automated build scripts for Visual compiler by [@KrzysFR](https://github.com/KrzysFR), in `build/VS_scripts`,
+  which will build `zstd` cli and `libzstd` library without any need to open Visual Studio solution.
+
+### Buck
+
+You can build the zstd binary via buck by executing: `buck build programs:zstd` from the root of the repo.
+The output binary will be in `buck-out/gen/programs/`.
+
+## Testing
+
+You can run quick local smoke tests by executing the `playTest.sh` script from the `src/tests` directory.
+Two env variables `$ZSTD_BIN` and `$DATAGEN_BIN` are needed for the test script to locate the zstd and datagen binary.
+For information on CI testing, please refer to TESTING.md
+
+## Status
+
+Zstandard is currently deployed within Facebook. It is used continuously to compress large amounts of data in multiple formats and use cases.
+Zstandard is considered safe for production environments.
+
+## License
+
+Zstandard is dual-licensed under [BSD](LICENSE) and [GPLv2](COPYING).
+
+## Contributing
+
+The `dev` branch is the one where all contributions are merged before reaching `release`.
+If you plan to propose a patch, please commit into the `dev` branch, or its own feature branch.
+Direct commit to `release` are not permitted.
+For more information, please read [CONTRIBUTING](CONTRIBUTING.md).
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/TESTING.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/TESTING.md
@ -0,0 +1,43 @@
+Testing
+=======
+
+Zstandard CI testing is split up into three sections:
+short, medium, and long tests.
+
+Short Tests
+-----------
+Short tests run on CircleCI for new commits on every branch and pull request.
+They consist of the following tests:
+- Compilation on all supported targets (x86, x86_64, ARM, AArch64, PowerPC, and PowerPC64)
+- Compilation on various versions of gcc, clang, and g++
+- `tests/playTests.sh` on x86_64, without the tests on long data (CLI tests)
+- Small tests (`tests/legacy.c`, `tests/longmatch.c`) on x64_64
+
+Medium Tests
+------------
+Medium tests run on every commit and pull request to `dev` branch, on TravisCI.
+They consist of the following tests:
+- The following tests run with UBsan and Asan on x86_64 and x86, as well as with
+  Msan on x86_64
+  - `tests/playTests.sh --test-large-data`
+  - Fuzzer tests: `tests/fuzzer.c`, `tests/zstreamtest.c`, and `tests/decodecorpus.c`
+- `tests/zstreamtest.c` under Tsan (streaming mode, including multithreaded mode)
+- Valgrind Test (`make -C tests valgrindTest`) (testing CLI and fuzzer under valgrind)
+- Fuzzer tests (see above) on ARM, AArch64, PowerPC, and PowerPC64
+
+Long Tests
+----------
+Long tests run on all commits to `release` branch,
+and once a day on the current version of `dev` branch,
+on TravisCI.
+They consist of the following tests:
+- Entire test suite (including fuzzers and some other specialized tests) on:
+  - x86_64 and x86 with UBsan and Asan
+  - x86_64 with Msan
+  - ARM, AArch64, PowerPC, and PowerPC64
+- Streaming mode fuzzer with Tsan (for the `zstdmt` testing)
+- ZlibWrapper tests, including under valgrind
+- Versions test (ensuring `zstd` can decode files from all previous versions)
+- `pzstd` with asan and tsan, as well as in 32-bits mode
+- Testing `zstd` with legacy mode off
+- Entire test suite and make install on macOS
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/appveyor.yml
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/appveyor.yml
@ -0,0 +1,284 @@
+# Following tests are run _only_ on `release` branch
+# and on selected feature branch named `appveyorTest` or `visual*`
+
+-
+  version: 1.0.{build}
+  branches:
+    only:
+    - release
+    - master
+    - /appveyor*/
+    - /visual*/
+  environment:
+    matrix:
+    - COMPILER: "gcc"
+      HOST:     "mingw"
+      PLATFORM: "x64"
+      SCRIPT:   "make allzstd MOREFLAGS=-static"
+      ARTIFACT: "true"
+      BUILD:    "true"
+    - COMPILER: "gcc"
+      HOST:     "mingw"
+      PLATFORM: "x86"
+      SCRIPT:   "make allzstd MOREFLAGS=-static"
+      ARTIFACT: "true"
+      BUILD:    "true"
+    - COMPILER: "clang"
+      HOST:     "mingw"
+      PLATFORM: "x64"
+      SCRIPT:   "MOREFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
+      BUILD:    "true"
+
+    - COMPILER: "gcc"
+      HOST:     "mingw"
+      PLATFORM: "x64"
+      SCRIPT:   ""
+      TEST:     "cmake"
+
+    - COMPILER: "visual"
+      HOST:     "visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Debug"
+    - COMPILER: "visual"
+      HOST:     "visual"
+      PLATFORM: "Win32"
+      CONFIGURATION: "Debug"
+    - COMPILER: "visual"
+      HOST:     "visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+    - COMPILER: "visual"
+      HOST:     "visual"
+      PLATFORM: "Win32"
+      CONFIGURATION: "Release"
+
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
+  install:
+  - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
+  - SET PATH_ORIGINAL=%PATH%
+  - if [%HOST%]==[mingw] (
+      SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
+      SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
+      COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe &&
+      COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe
+    )
+  - IF [%HOST%]==[visual] IF [%PLATFORM%]==[x64] (
+      SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;"
+    )
+
+  build_script:
+  - if [%HOST%]==[mingw] (
+      ( if [%PLATFORM%]==[x64] (
+        SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
+      ) else if [%PLATFORM%]==[x86] (
+        SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%"
+      ) )
+    )
+  - if [%HOST%]==[mingw] if [%BUILD%]==[true] (
+      make -v &&
+      sh -c "%COMPILER% -v" &&
+      ECHO Building zlib to static link &&
+      SET "CC=%COMPILER%" &&
+      sh -c "cd .. && git clone --depth 1 --branch v1.2.11 https://github.com/madler/zlib" &&
+      sh -c "cd ../zlib && make -f win32/Makefile.gcc libz.a"
+      ECHO Building zstd &&
+      SET "CPPFLAGS=-I../../zlib" &&
+      SET "LDFLAGS=../../zlib/libz.a" &&
+      sh -c "%SCRIPT%" &&
+      ( if [%COMPILER%]==[gcc] if [%ARTIFACT%]==[true]
+          ECHO Creating artifacts &&
+          ECHO %cd% &&
+          lib\dll\example\build_package.bat &&
+          make -C programs DEBUGFLAGS= clean zstd &&
+          cd programs\ && 7z a -tzip -mx9 zstd-win-binary-%PLATFORM%.zip zstd.exe &&
+          appveyor PushArtifact zstd-win-binary-%PLATFORM%.zip &&
+          cp zstd.exe ..\bin\zstd.exe &&
+          git clone --depth 1 --branch release https://github.com/facebook/zstd &&
+          cd zstd &&
+          git archive --format=tar release -o zstd-src.tar &&
+          ..\zstd -19 zstd-src.tar &&
+          appveyor PushArtifact zstd-src.tar.zst &&
+          certUtil -hashfile zstd-src.tar.zst SHA256 > zstd-src.tar.zst.sha256.sig &&
+          appveyor PushArtifact zstd-src.tar.zst.sha256.sig &&
+          cd ..\..\bin\ &&
+          7z a -tzip -mx9 zstd-win-release-%PLATFORM%.zip * &&
+          appveyor PushArtifact zstd-win-release-%PLATFORM%.zip
+      )
+    )
+  - if [%HOST%]==[visual] (
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2008 %PLATFORM%\%CONFIGURATION% in %APPVEYOR_BUILD_FOLDER% &&
+      ECHO *** &&
+      msbuild "build\VS2008\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v90 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2008\bin\%PLATFORM%\%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2008/bin/%PLATFORM%/%CONFIGURATION%/*.exe &&
+      COPY build\VS2008\bin\%PLATFORM%\%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2008_%PLATFORM%_%CONFIGURATION%.exe &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "build\VS2010\zstd.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      msbuild "build\VS2010\zstd.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2010_%PLATFORM%_%CONFIGURATION%.exe &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2012_%PLATFORM%_%CONFIGURATION%.exe &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /p:ForceImportBeforeCppTargets=%APPVEYOR_BUILD_FOLDER%\build\VS2010\CompileAsCpp.props /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      DIR build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe &&
+      MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
+      COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2013_%PLATFORM%_%CONFIGURATION%.exe &&
+      COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
+    )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
+
+  test_script:
+  - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
+  - SET "CC=gcc"
+  - SET "CXX=g++"
+  - if [%TEST%]==[cmake] (
+      mkdir build\cmake\build &&
+      cd build\cmake\build &&
+      SET FUZZERTEST=-T2mn &&
+      SET ZSTREAM_TESTTIME=-T2mn &&
+      cmake -G "Visual Studio 14 2015 Win64" .. &&
+      cd ..\..\.. &&
+      make clean
+    )
+  - SET "FUZZERTEST=-T30s"
+  - if [%HOST%]==[visual] if [%CONFIGURATION%]==[Release] (
+      CD tests &&
+      SET ZSTD_BIN=./zstd.exe&&
+      SET DATAGEN_BIN=./datagen.exe&&
+      sh -e playTests.sh --test-large-data &&
+      fullbench.exe -i1 &&
+      fullbench.exe -i1 -P0 &&
+      fuzzer_VS2012_%PLATFORM%_Release.exe %FUZZERTEST% &&
+      fuzzer_VS2013_%PLATFORM%_Release.exe %FUZZERTEST% &&
+      fuzzer_VS2015_%PLATFORM%_Release.exe %FUZZERTEST%
+    )
+
+
+# The following tests are for regular pushes
+# into `dev` or some feature branch
+# There run less tests, for shorter feedback loop
+
+-
+  version: 1.0.{build}
+  environment:
+    matrix:
+    - COMPILER: "gcc"
+      HOST:     "cygwin"
+      PLATFORM: "x64"
+    - COMPILER: "gcc"
+      HOST:     "mingw"
+      PLATFORM: "x64"
+      SCRIPT:   "CFLAGS=-Werror make -j allzstd DEBUGLEVEL=2"
+    - COMPILER: "gcc"
+      HOST:     "mingw"
+      PLATFORM: "x86"
+      SCRIPT:   "CFLAGS=-Werror make -j allzstd"
+    - COMPILER: "clang"
+      HOST:     "mingw"
+      PLATFORM: "x64"
+      SCRIPT:   "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"
+
+    - COMPILER: "clang-cl"
+      HOST:     "cmake-visual"
+      PLATFORM: "x64"
+      CONFIGURATION: "Release"
+      CMAKE_GENERATOR: "Visual Studio 15 2017"
+      CMAKE_GENERATOR_PLATFORM: "x64"
+      CMAKE_GENERATOR_TOOLSET: "LLVM"
+      APPVEYOR_BUILD_WORKER_IMAGE: "Visual Studio 2017"
+
+  install:
+  - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
+  - SET PATH_ORIGINAL=%PATH%
+  - if [%HOST%]==[cygwin] (
+      ECHO Installing Cygwin Packages &&
+      C:\cygwin64\setup-x86_64.exe -qnNdO -R "C:\cygwin64" -g -P ^
+        gcc,^
+        cmake,^
+        make
+    )
+  - if [%HOST%]==[mingw] (
+      SET "PATH_MINGW32=C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin" &&
+      SET "PATH_MINGW64=C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin" &&
+      COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin\make.exe &&
+      COPY C:\msys64\usr\bin\make.exe C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin\make.exe
+    )
+
+  build_script:
+  - ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION%
+  - if [%HOST%]==[cygwin] (
+      set CHERE_INVOKING=yes &&
+      set CC=%COMPILER% &&
+      C:\cygwin64\bin\bash --login -c "
+        set -e;
+        cd build/cmake;
+        CFLAGS='-Werror' cmake -G 'Unix Makefiles' -DCMAKE_BUILD_TYPE=Debug -DZSTD_BUILD_TESTS:BOOL=ON -DZSTD_FUZZER_FLAGS=-T20s -DZSTD_ZSTREAM_FLAGS=-T20s -DZSTD_FULLBENCH_FLAGS=-i0 .;
+        make VERBOSE=1 -j;
+        ctest -V -L Medium;
+      "
+    )
+  - if [%HOST%]==[mingw] (
+      ( if [%PLATFORM%]==[x64] (
+        SET "PATH=%PATH_MINGW64%;%PATH_ORIGINAL%"
+      ) else if [%PLATFORM%]==[x86] (
+        SET "PATH=%PATH_MINGW32%;%PATH_ORIGINAL%"
+      ) ) &&
+      make -v &&
+      sh -c "%COMPILER% -v" &&
+      set "CC=%COMPILER%" &&
+      sh -c "%SCRIPT%"
+    )
+  - if [%HOST%]==[cmake-visual] (
+      ECHO *** &&
+      ECHO *** Building %CMAKE_GENERATOR% ^(%CMAKE_GENERATOR_TOOLSET%^) %PLATFORM%\%CONFIGURATION% &&
+      PUSHD build\cmake &&
+      cmake -DBUILD_TESTING=ON . &&
+      cmake --build . --config %CONFIGURATION% -j4 &&
+      POPD &&
+      ECHO ***
+    )
+
+
+  test_script:
+  - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
+  - if [%HOST%]==[mingw] (
+      set "CC=%COMPILER%" &&
+      make clean &&
+      make check
+    )
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/README.md
@ -0,0 +1,3 @@
+## Project Support Notice
+
+The VS2005 Project directory has been moved to the contrib directory in order to indicate that it will no longer be supported.
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/fullbench/fullbench.vcproj
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/fullbench/fullbench.vcproj
@ -0,0 +1,440 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="fullbench"
+	ProjectGUID="{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
+	RootNamespace="fullbench"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\programs\datagen.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\entropy_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\fse_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\tests\fullbench.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\huf_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\huf_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="..\..\..\lib\common\bitstream.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd_errors.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\mem.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_internal.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_opt.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_static.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/fuzzer/fuzzer.vcproj
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/fuzzer/fuzzer.vcproj
@ -0,0 +1,488 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="fuzzer"
+	ProjectGUID="{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
+	RootNamespace="fuzzer"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\programs\datagen.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\entropy_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\pool.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\threading.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\fse_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\tests\fuzzer.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\huf_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\huf_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="..\..\..\lib\common\pool.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\threading.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\bitstream.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd_errors.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\mem.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zdict.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_internal.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_opt.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_static.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstd.sln
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstd.sln
@ -0,0 +1,55 @@
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual C++ Express 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcproj", "{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcproj", "{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcproj", "{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdlib", "zstdlib\zstdlib.vcproj", "{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.ActiveCfg = Debug|Win32
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.Build.0 = Debug|Win32
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.ActiveCfg = Debug|x64
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.Build.0 = Debug|x64
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.ActiveCfg = Release|Win32
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.Build.0 = Release|Win32
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.ActiveCfg = Release|x64
+		{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.Build.0 = Release|x64
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.ActiveCfg = Debug|Win32
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.Build.0 = Debug|Win32
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.ActiveCfg = Debug|x64
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.Build.0 = Debug|x64
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.ActiveCfg = Release|Win32
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.Build.0 = Release|Win32
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.ActiveCfg = Release|x64
+		{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.Build.0 = Release|x64
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.ActiveCfg = Debug|Win32
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.Build.0 = Debug|Win32
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.ActiveCfg = Debug|x64
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.Build.0 = Debug|x64
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.ActiveCfg = Release|Win32
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.Build.0 = Release|Win32
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.ActiveCfg = Release|x64
+		{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.Build.0 = Release|x64
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.ActiveCfg = Debug|Win32
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.Build.0 = Debug|Win32
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.ActiveCfg = Debug|x64
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.Build.0 = Debug|x64
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.ActiveCfg = Release|Win32
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.Build.0 = Release|Win32
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.ActiveCfg = Release|x64
+		{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstd/zstd.vcproj
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstd/zstd.vcproj
@ -0,0 +1,548 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="zstd"
+	ProjectGUID="{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
+	RootNamespace="zstd"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
+				PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="setargv.obj"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
+				PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="setargv.obj"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
+				PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="setargv.obj"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
+				PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="setargv.obj"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\programs\bench.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\programs\datagen.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\programs\dibio.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\entropy_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\programs\fileio.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\fse_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\huf_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\huf_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v01.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v02.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v03.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v04.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v05.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v06.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\programs\zstdcli.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="..\..\..\lib\common\bitstream.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd_errors.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\mem.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zdict.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_internal.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_opt.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v01.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v02.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v03.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v04.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v05.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v06.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstdlib/zstdlib.vcproj
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/VS2005/zstdlib/zstdlib.vcproj
@ -0,0 +1,546 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="zstdlib"
+	ProjectGUID="{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
+	RootNamespace="zstdlib"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				WarnAsError="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="2"
+			CharacterSet="2"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				EnableIntrinsicFunctions="true"
+				OmitFramePointers="true"
+				AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
+				PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				WarningLevel="4"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="1"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\cover.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\pool.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\threading.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\entropy_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\fse_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\huf_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\huf_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_common.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_compress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v01.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v02.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v03.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v04.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v05.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v06.c"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.c"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath="..\..\..\lib\common\bitstream.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\pool.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\threading.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\error_private.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd_errors.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\fse_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\huf_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\mem.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\xxhash.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zdict.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\zstd.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_internal.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstd_opt.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\common\zstd_static.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v01.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v02.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v03.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v04.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v05.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v06.h"
+				>
+			</File>
+			<File
+				RelativePath="..\..\..\lib\legacy\zstd_v07.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/cleanTabs
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/cleanTabs
@ -0,0 +1,2 @@
+#!/bin/sh
+sed -i '' $'s/\t/    /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c}
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/.gitignore
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/.gitignore
@ -0,0 +1 @@
+check_flipped_bits
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/Makefile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/Makefile
@ -0,0 +1,35 @@
+# ################################################################
+# Copyright (c) 2019-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+.PHONY: all
+all: check_flipped_bits
+
+ZSTDLIBDIR ?= ../../lib
+
+CFLAGS     ?= -O3
+CFLAGS     += -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZSTDLIBDIR)/compress \
+              -I$(ZSTDLIBDIR)/decompress
+CFLAGS     += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow                 \
+              -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+              -Wstrict-prototypes -Wundef                                     \
+              -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings      \
+              -Wredundant-decls -Wmissing-prototypes
+CFLAGS     += $(DEBUGFLAGS) $(MOREFLAGS)
+FLAGS       = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+.PHONY: $(ZSTDLIBDIR)/libzstd.a
+$(ZSTDLIBDIR)/libzstd.a:
+	$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
+
+check_flipped_bits: check_flipped_bits.c $(ZSTDLIBDIR)/libzstd.a
+	$(CC) $(FLAGS) $< -o $@$(EXT) $(ZSTDLIBDIR)/libzstd.a
+
+.PHONY: clean
+clean:
+	rm -f check_flipped_bits
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/check_flipped_bits.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/diagnose_corruption/check_flipped_bits.c
@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2019-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "zstd_errors.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+typedef struct {
+  char *input;
+  size_t input_size;
+
+  char *perturbed; /* same size as input */
+
+  char *output;
+  size_t output_size;
+
+  const char *dict_file_name;
+  const char *dict_file_dir_name;
+  int32_t dict_id;
+  char *dict;
+  size_t dict_size;
+  ZSTD_DDict* ddict;
+
+  ZSTD_DCtx* dctx;
+
+  int success_count;
+  int error_counts[ZSTD_error_maxCode];
+} stuff_t;
+
+static void free_stuff(stuff_t* stuff) {
+  free(stuff->input);
+  free(stuff->output);
+  ZSTD_freeDDict(stuff->ddict);
+  free(stuff->dict);
+  ZSTD_freeDCtx(stuff->dctx);
+}
+
+static void usage(void) {
+  fprintf(stderr, "check_flipped_bits input_filename [-d dict] [-D dict_dir]\n");
+  fprintf(stderr, "\n");
+  fprintf(stderr, "Arguments:\n");
+  fprintf(stderr, "    -d file: path to a dictionary file to use.\n");
+  fprintf(stderr, "    -D dir : path to a directory, with files containing dictionaries, of the\n"
+                  "             form DICTID.zstd-dict, e.g., 12345.zstd-dict.\n");
+  exit(1);
+}
+
+static void print_summary(stuff_t* stuff) {
+  int error_code;
+  fprintf(stderr, "%9d successful decompressions\n", stuff->success_count);
+  for (error_code = 0; error_code < ZSTD_error_maxCode; error_code++) {
+    int count = stuff->error_counts[error_code];
+    if (count) {
+      fprintf(
+          stderr, "%9d failed decompressions with message: %s\n",
+          count, ZSTD_getErrorString(error_code));
+    }
+  }
+}
+
+static char* readFile(const char* filename, size_t* size) {
+  struct stat statbuf;
+  int ret;
+  FILE* f;
+  char *buf;
+  size_t bytes_read;
+
+  ret = stat(filename, &statbuf);
+  if (ret != 0) {
+    fprintf(stderr, "stat failed: %m\n");
+    return NULL;
+  }
+  if ((statbuf.st_mode & S_IFREG) != S_IFREG) {
+    fprintf(stderr, "Input must be regular file\n");
+    return NULL;
+  }
+
+  *size = statbuf.st_size;
+
+  f = fopen(filename, "r");
+  if (f == NULL) {
+    fprintf(stderr, "fopen failed: %m\n");
+    return NULL;
+  }
+
+  buf = malloc(*size);
+  if (buf == NULL) {
+    fprintf(stderr, "malloc failed\n");
+    fclose(f);
+    return NULL;
+  }
+
+  bytes_read = fread(buf, 1, *size, f);
+  if (bytes_read != *size) {
+    fprintf(stderr, "failed to read whole file\n");
+    fclose(f);
+    free(buf);
+    return NULL;
+  }
+
+  ret = fclose(f);
+  if (ret != 0) {
+    fprintf(stderr, "fclose failed: %m\n");
+    free(buf);
+    return NULL;
+  }
+
+  return buf;
+}
+
+static ZSTD_DDict* readDict(const char* filename, char **buf, size_t* size, int32_t* dict_id) {
+  ZSTD_DDict* ddict;
+  *buf = readFile(filename, size);
+  if (*buf == NULL) {
+    fprintf(stderr, "Opening dictionary file '%s' failed\n", filename);
+    return NULL;
+  }
+
+  ddict = ZSTD_createDDict_advanced(*buf, *size, ZSTD_dlm_byRef, ZSTD_dct_auto, ZSTD_defaultCMem);
+  if (ddict == NULL) {
+    fprintf(stderr, "Failed to create ddict.\n");
+    return NULL;
+  }
+  if (dict_id != NULL) {
+    *dict_id = ZSTD_getDictID_fromDDict(ddict);
+  }
+  return ddict;
+}
+
+static ZSTD_DDict* readDictByID(stuff_t *stuff, int32_t dict_id, char **buf, size_t* size) {
+  if (stuff->dict_file_dir_name == NULL) {
+    return NULL;
+  } else {
+    size_t dir_name_len = strlen(stuff->dict_file_dir_name);
+    int dir_needs_separator = 0;
+    size_t dict_file_name_alloc_size = dir_name_len + 1 /* '/' */ + 10 /* max int32_t len */ + strlen(".zstd-dict") + 1 /* '\0' */;
+    char *dict_file_name = malloc(dict_file_name_alloc_size);
+    ZSTD_DDict* ddict;
+    int32_t read_dict_id;
+    if (dict_file_name == NULL) {
+      fprintf(stderr, "malloc failed.\n");
+      return 0;
+    }
+
+    if (dir_name_len > 0 && stuff->dict_file_dir_name[dir_name_len - 1] != '/') {
+      dir_needs_separator = 1;
+    }
+
+    snprintf(
+      dict_file_name,
+      dict_file_name_alloc_size,
+      "%s%s%u.zstd-dict",
+      stuff->dict_file_dir_name,
+      dir_needs_separator ? "/" : "",
+      dict_id);
+
+    /* fprintf(stderr, "Loading dict %u from '%s'.\n", dict_id, dict_file_name); */
+
+    ddict = readDict(dict_file_name, buf, size, &read_dict_id);
+    if (ddict == NULL) {
+      fprintf(stderr, "Failed to create ddict from '%s'.\n", dict_file_name);
+      free(dict_file_name);
+      return 0;
+    }
+    if (read_dict_id != dict_id) {
+      fprintf(stderr, "Read dictID (%u) does not match expected (%u).\n", read_dict_id, dict_id);
+      free(dict_file_name);
+      ZSTD_freeDDict(ddict);
+      return 0;
+    }
+
+    free(dict_file_name);
+    return ddict;
+  }
+}
+
+static int init_stuff(stuff_t* stuff, int argc, char *argv[]) {
+  const char* input_filename;
+
+  if (argc < 2) {
+    usage();
+  }
+
+  input_filename = argv[1];
+  stuff->input_size = 0;
+  stuff->input = readFile(input_filename, &stuff->input_size);
+  if (stuff->input == NULL) {
+    fprintf(stderr, "Failed to read input file.\n");
+    return 0;
+  }
+
+  stuff->perturbed = malloc(stuff->input_size);
+  if (stuff->perturbed == NULL) {
+    fprintf(stderr, "malloc failed.\n");
+    return 0;
+  }
+  memcpy(stuff->perturbed, stuff->input, stuff->input_size);
+
+  stuff->output_size = ZSTD_DStreamOutSize();
+  stuff->output = malloc(stuff->output_size);
+  if (stuff->output == NULL) {
+    fprintf(stderr, "malloc failed.\n");
+    return 0;
+  }
+
+  stuff->dict_file_name = NULL;
+  stuff->dict_file_dir_name = NULL;
+  stuff->dict_id = 0;
+  stuff->dict = NULL;
+  stuff->dict_size = 0;
+  stuff->ddict = NULL;
+
+  if (argc > 2) {
+    if (!strcmp(argv[2], "-d")) {
+      if (argc > 3) {
+        stuff->dict_file_name = argv[3];
+      } else {
+        usage();
+      }
+    } else
+    if (!strcmp(argv[2], "-D")) {
+      if (argc > 3) {
+        stuff->dict_file_dir_name = argv[3];
+      } else {
+        usage();
+      }
+    } else {
+      usage();
+    }
+  }
+
+  if (stuff->dict_file_dir_name) {
+    int32_t dict_id = ZSTD_getDictID_fromFrame(stuff->input, stuff->input_size);
+    if (dict_id != 0) {
+      stuff->ddict = readDictByID(stuff, dict_id, &stuff->dict, &stuff->dict_size);
+      if (stuff->ddict == NULL) {
+        fprintf(stderr, "Failed to create cached ddict.\n");
+        return 0;
+      }
+      stuff->dict_id = dict_id;
+    }
+  } else
+  if (stuff->dict_file_name) {
+    stuff->ddict = readDict(stuff->dict_file_name, &stuff->dict, &stuff->dict_size, &stuff->dict_id);
+    if (stuff->ddict == NULL) {
+      fprintf(stderr, "Failed to create ddict from '%s'.\n", stuff->dict_file_name);
+      return 0;
+    }
+  }
+
+  stuff->dctx = ZSTD_createDCtx();
+  if (stuff->dctx == NULL) {
+    return 0;
+  }
+
+  stuff->success_count = 0;
+  memset(stuff->error_counts, 0, sizeof(stuff->error_counts));
+
+  return 1;
+}
+
+static int test_decompress(stuff_t* stuff) {
+  size_t ret;
+  ZSTD_inBuffer in = {stuff->perturbed, stuff->input_size, 0};
+  ZSTD_outBuffer out = {stuff->output, stuff->output_size, 0};
+  ZSTD_DCtx* dctx = stuff->dctx;
+  int32_t custom_dict_id = ZSTD_getDictID_fromFrame(in.src, in.size);
+  char *custom_dict = NULL;
+  size_t custom_dict_size = 0;
+  ZSTD_DDict* custom_ddict = NULL;
+
+  if (custom_dict_id != 0 && custom_dict_id != stuff->dict_id) {
+    /* fprintf(stderr, "Instead of dict %u, this perturbed blob wants dict %u.\n", stuff->dict_id, custom_dict_id); */
+    custom_ddict = readDictByID(stuff, custom_dict_id, &custom_dict, &custom_dict_size);
+  }
+
+  ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
+
+  if (custom_ddict != NULL) {
+    ZSTD_DCtx_refDDict(dctx, custom_ddict);
+  } else {
+    ZSTD_DCtx_refDDict(dctx, stuff->ddict);
+  }
+
+  while (in.pos != in.size) {
+    out.pos = 0;
+    ret = ZSTD_decompressStream(dctx, &out, &in);
+
+    if (ZSTD_isError(ret)) {
+      unsigned int code = ZSTD_getErrorCode(ret);
+      if (code >= ZSTD_error_maxCode) {
+        fprintf(stderr, "Received unexpected error code!\n");
+        exit(1);
+      }
+      stuff->error_counts[code]++;
+      /*
+      fprintf(
+          stderr, "Decompression failed: %s\n", ZSTD_getErrorName(ret));
+      */
+      if (custom_ddict != NULL) {
+        ZSTD_freeDDict(custom_ddict);
+        free(custom_dict);
+      }
+      return 0;
+    }
+  }
+
+  stuff->success_count++;
+
+  if (custom_ddict != NULL) {
+    ZSTD_freeDDict(custom_ddict);
+    free(custom_dict);
+  }
+  return 1;
+}
+
+static int perturb_bits(stuff_t* stuff) {
+  size_t pos;
+  size_t bit;
+  for (pos = 0; pos < stuff->input_size; pos++) {
+    unsigned char old_val = stuff->input[pos];
+    if (pos % 1000 == 0) {
+      fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
+    }
+    for (bit = 0; bit < 8; bit++) {
+      unsigned char new_val = old_val ^ (1 << bit);
+      stuff->perturbed[pos] = new_val;
+      if (test_decompress(stuff)) {
+        fprintf(
+            stderr,
+            "Flipping byte %zu bit %zu (0x%02x -> 0x%02x) "
+            "produced a successful decompression!\n",
+            pos, bit, old_val, new_val);
+      }
+    }
+    stuff->perturbed[pos] = old_val;
+  }
+  return 1;
+}
+
+static int perturb_bytes(stuff_t* stuff) {
+  size_t pos;
+  size_t new_val;
+  for (pos = 0; pos < stuff->input_size; pos++) {
+    unsigned char old_val = stuff->input[pos];
+    if (pos % 1000 == 0) {
+      fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
+    }
+    for (new_val = 0; new_val < 256; new_val++) {
+      stuff->perturbed[pos] = new_val;
+      if (test_decompress(stuff)) {
+        fprintf(
+            stderr,
+            "Changing byte %zu (0x%02x -> 0x%02x) "
+            "produced a successful decompression!\n",
+            pos, old_val, (unsigned char)new_val);
+      }
+    }
+    stuff->perturbed[pos] = old_val;
+  }
+  return 1;
+}
+
+int main(int argc, char* argv[]) {
+  stuff_t stuff;
+
+  if(!init_stuff(&stuff, argc, argv)) {
+    fprintf(stderr, "Failed to init.\n");
+    return 1;
+  }
+
+  if (test_decompress(&stuff)) {
+    fprintf(stderr, "Blob already decompresses successfully!\n");
+    return 1;
+  }
+
+  perturb_bits(&stuff);
+
+  perturb_bytes(&stuff);
+
+  print_summary(&stuff);
+
+  free_stuff(&stuff);
+
+  return 0;
+}
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/docker/Dockerfile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/docker/Dockerfile
@ -0,0 +1,20 @@
+# Dockerfile
+# First image to build the binary
+FROM alpine as builder
+
+RUN apk --no-cache add make gcc libc-dev
+COPY . /src
+RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
+
+# Second minimal image to only keep the built binary
+FROM alpine
+
+# Copy the built files
+COPY --from=builder /pkg /
+
+# Copy the license as well
+RUN mkdir -p /usr/local/share/licenses/zstd
+COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
+
+# Just run `zstd` if no other command is given
+CMD ["/usr/local/bin/zstd"]
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/docker/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/docker/README.md
@ -0,0 +1,20 @@
+
+## Requirement
+
+The `Dockerfile` script requires a version of `docker` >= 17.05
+
+## Installing docker
+
+The official docker install docs use a ppa with a modern version available:
+https://docs.docker.com/install/linux/docker-ce/ubuntu/
+
+## How to run
+
+`docker build -t zstd .`
+
+## test
+
+```
+echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
+foo
+```
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/freestanding_lib/freestanding.py
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/freestanding_lib/freestanding.py
@ -0,0 +1,749 @@
+#!/usr/bin/env python3
+# ################################################################
+# Copyright (c) 2021-2021, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ##########################################################################
+
+import argparse
+import contextlib
+import os
+import re
+import shutil
+import sys
+from typing import Optional
+
+
+INCLUDED_SUBDIRS = ["common", "compress", "decompress"]
+
+SKIPPED_FILES = [
+    "common/mem.h",
+    "common/zstd_deps.h",
+    "common/pool.c",
+    "common/pool.h",
+    "common/threading.c",
+    "common/threading.h",
+    "common/zstd_trace.c",
+    "common/zstd_trace.h",
+    "compress/zstdmt_compress.h",
+    "compress/zstdmt_compress.c",
+]
+
+XXHASH_FILES = [
+    "common/xxhash.c",
+    "common/xxhash.h",
+]
+
+
+class FileLines(object):
+    def __init__(self, filename):
+        self.filename = filename
+        with open(self.filename, "r") as f:
+            self.lines = f.readlines()
+
+    def write(self):
+        with open(self.filename, "w") as f:
+            f.write("".join(self.lines))
+
+
+class PartialPreprocessor(object):
+    """
+    Looks for simple ifdefs and ifndefs and replaces them.
+    Handles && and ||.
+    Has fancy logic to handle translating elifs to ifs.
+    Only looks for macros in the first part of the expression with no
+    parens.
+    Does not handle multi-line macros (only looks in first line).
+    """
+    def __init__(self, defs: [(str, Optional[str])], replaces: [(str, str)], undefs: [str]):
+        MACRO_GROUP = r"(?P<macro>[a-zA-Z_][a-zA-Z_0-9]*)"
+        ELIF_GROUP = r"(?P<elif>el)?"
+        OP_GROUP = r"(?P<op>&&|\|\|)?"
+
+        self._defs = {macro:value for macro, value in defs}
+        self._replaces = {macro:value for macro, value in replaces}
+        self._defs.update(self._replaces)
+        self._undefs = set(undefs)
+
+        self._define = re.compile(r"\s*#\s*define")
+        self._if = re.compile(r"\s*#\s*if")
+        self._elif = re.compile(r"\s*#\s*(?P<elif>el)if")
+        self._else = re.compile(r"\s*#\s*(?P<else>else)")
+        self._endif = re.compile(r"\s*#\s*endif")
+
+        self._ifdef = re.compile(fr"\s*#\s*if(?P<not>n)?def {MACRO_GROUP}\s*")
+        self._if_defined = re.compile(
+            fr"\s*#\s*{ELIF_GROUP}if\s+(?P<not>!)?\s*defined\s*\(\s*{MACRO_GROUP}\s*\)\s*{OP_GROUP}"
+        )
+        self._if_defined_value = re.compile(
+            fr"\s*#\s*{ELIF_GROUP}if\s+defined\s*\(\s*{MACRO_GROUP}\s*\)\s*"
+            fr"(?P<op>&&)\s*"
+            fr"(?P<openp>\()?\s*"
+            fr"(?P<macro2>[a-zA-Z_][a-zA-Z_0-9]*)\s*"
+            fr"(?P<cmp>[=><!]+)\s*"
+            fr"(?P<value>[0-9]*)\s*"
+            fr"(?P<closep>\))?\s*"
+        )
+        self._if_true = re.compile(
+            fr"\s*#\s*{ELIF_GROUP}if\s+{MACRO_GROUP}\s*{OP_GROUP}"
+        )
+
+        self._c_comment = re.compile(r"/\*.*?\*/")
+        self._cpp_comment = re.compile(r"//")
+
+    def _log(self, *args, **kwargs):
+        print(*args, **kwargs)
+
+    def _strip_comments(self, line):
+        # First strip c-style comments (may include //)
+        while True:
+            m = self._c_comment.search(line)
+            if m is None:
+                break
+            line = line[:m.start()] + line[m.end():]
+
+        # Then strip cpp-style comments
+        m = self._cpp_comment.search(line)
+        if m is not None:
+            line = line[:m.start()]
+
+        return line
+
+    def _fixup_indentation(self, macro, replace: [str]):
+        if len(replace) == 0:
+            return replace
+        if len(replace) == 1 and self._define.match(replace[0]) is None:
+            # If there is only one line, only replace defines
+            return replace
+
+
+        all_pound = True
+        for line in replace:
+            if not line.startswith('#'):
+                all_pound = False
+        if all_pound:
+            replace = [line[1:] for line in replace]
+
+        min_spaces = len(replace[0])
+        for line in replace:
+            spaces = 0
+            for i, c in enumerate(line):
+                if c != ' ':
+                    # Non-preprocessor line ==> skip the fixup
+                    if not all_pound and c != '#':
+                        return replace
+                    spaces = i
+                    break
+            min_spaces = min(min_spaces, spaces)
+
+        replace = [line[min_spaces:] for line in replace]
+
+        if all_pound:
+            replace = ["#" + line for line in replace]
+
+        return replace
+
+    def _handle_if_block(self, macro, idx, is_true, prepend):
+        """
+        Remove the #if or #elif block starting on this line.
+        """
+        REMOVE_ONE = 0
+        KEEP_ONE = 1
+        REMOVE_REST = 2
+
+        if is_true:
+            state = KEEP_ONE
+        else:
+            state = REMOVE_ONE
+
+        line = self._inlines[idx]
+        is_if = self._if.match(line) is not None
+        assert is_if or self._elif.match(line) is not None
+        depth = 0
+
+        start_idx = idx
+
+        idx += 1
+        replace = prepend
+        finished = False
+        while idx < len(self._inlines):
+            line = self._inlines[idx]
+            # Nested if statement
+            if self._if.match(line):
+                depth += 1
+                idx += 1
+                continue
+            # We're inside a nested statement
+            if depth > 0:
+                if self._endif.match(line):
+                    depth -= 1
+                idx += 1
+                continue
+
+            # We're at the original depth
+
+            # Looking only for an endif.
+            # We've found a true statement, but haven't
+            # completely elided the if block, so we just
+            # remove the remainder.
+            if state == REMOVE_REST:
+                if self._endif.match(line):
+                    if is_if:
+                        # Remove the endif because we took the first if
+                        idx += 1
+                    finished = True
+                    break
+                idx += 1
+                continue
+
+            if state == KEEP_ONE:
+                m = self._elif.match(line)
+                if self._endif.match(line):
+                    replace += self._inlines[start_idx + 1:idx]
+                    idx += 1
+                    finished = True
+                    break
+                if self._elif.match(line) or self._else.match(line):
+                    replace += self._inlines[start_idx + 1:idx]
+                    state = REMOVE_REST
+                idx += 1
+                continue
+
+            if state == REMOVE_ONE:
+                m = self._elif.match(line)
+                if m is not None:
+                    if is_if:
+                        idx += 1
+                        b = m.start('elif')
+                        e = m.end('elif')
+                        assert e - b == 2
+                        replace.append(line[:b] + line[e:])
+                    finished = True
+                    break
+                m = self._else.match(line)
+                if m is not None:
+                    if is_if:
+                        idx += 1
+                        while self._endif.match(self._inlines[idx]) is None:
+                            replace.append(self._inlines[idx])
+                            idx += 1
+                        idx += 1
+                    finished = True
+                    break
+                if self._endif.match(line):
+                    if is_if:
+                        # Remove the endif because no other elifs
+                        idx += 1
+                    finished = True
+                    break
+                idx += 1
+                continue
+        if not finished:
+            raise RuntimeError("Unterminated if block!")
+
+        replace = self._fixup_indentation(macro, replace)
+
+        self._log(f"\tHardwiring {macro}")
+        if start_idx > 0:
+            self._log(f"\t\t  {self._inlines[start_idx - 1][:-1]}")
+        for x in range(start_idx, idx):
+            self._log(f"\t\t- {self._inlines[x][:-1]}")
+        for line in replace:
+            self._log(f"\t\t+ {line[:-1]}")
+        if idx < len(self._inlines):
+            self._log(f"\t\t  {self._inlines[idx][:-1]}")
+
+        return idx, replace
+
+    def _preprocess_once(self):
+        outlines = []
+        idx = 0
+        changed = False
+        while idx < len(self._inlines):
+            line = self._inlines[idx]
+            sline = self._strip_comments(line)
+            m = self._ifdef.fullmatch(sline)
+            if_true = False
+            if m is None:
+                m = self._if_defined_value.fullmatch(sline)
+            if m is None:
+                m = self._if_defined.match(sline)
+            if m is None:
+                m = self._if_true.match(sline)
+                if_true = (m is not None)
+            if m is None:
+                outlines.append(line)
+                idx += 1
+                continue
+
+            groups = m.groupdict()
+            macro = groups['macro']
+            op = groups.get('op')
+
+            if not (macro in self._defs or macro in self._undefs):
+                outlines.append(line)
+                idx += 1
+                continue
+
+            defined = macro in self._defs
+
+            # Needed variables set:
+            # resolved: Is the statement fully resolved?
+            # is_true: If resolved, is the statement true?
+            ifdef = False
+            if if_true:
+                if not defined:
+                    outlines.append(line)
+                    idx += 1
+                    continue
+
+                defined_value = self._defs[macro]
+                is_int = True
+                try:
+                    defined_value = int(defined_value)
+                except TypeError:
+                    is_int = False
+                except ValueError:
+                    is_int = False
+
+                resolved = is_int
+                is_true = (defined_value != 0)
+
+                if resolved and op is not None:
+                    if op == '&&':
+                        resolved = not is_true
+                    else:
+                        assert op == '||'
+                        resolved = is_true
+
+            else:
+                ifdef = groups.get('not') is None
+                elseif = groups.get('elif') is not None
+
+                macro2 = groups.get('macro2')
+                cmp = groups.get('cmp')
+                value = groups.get('value')
+                openp = groups.get('openp')
+                closep = groups.get('closep')
+
+                is_true = (ifdef == defined)
+                resolved = True
+                if op is not None:
+                    if op == '&&':
+                        resolved = not is_true
+                    else:
+                        assert op == '||'
+                        resolved = is_true
+
+                if macro2 is not None and not resolved:
+                    assert ifdef and defined and op == '&&' and cmp is not None
+                    # If the statment is true, but we have a single value check, then
+                    # check the value.
+                    defined_value = self._defs[macro]
+                    are_ints = True
+                    try:
+                        defined_value = int(defined_value)
+                        value = int(value)
+                    except TypeError:
+                        are_ints = False
+                    except ValueError:
+                        are_ints = False
+                    if (
+                            macro == macro2 and
+                            ((openp is None) == (closep is None)) and
+                            are_ints
+                    ):
+                        resolved = True
+                        if cmp == '<':
+                            is_true = defined_value < value
+                        elif cmp == '<=':
+                            is_true = defined_value <= value
+                        elif cmp == '==':
+                            is_true = defined_value == value
+                        elif cmp == '!=':
+                            is_true = defined_value != value
+                        elif cmp == '>=':
+                            is_true = defined_value >= value
+                        elif cmp == '>':
+                            is_true = defined_value > value
+                        else:
+                            resolved = False
+
+                if op is not None and not resolved:
+                    # Remove the first op in the line + spaces
+                    if op == '&&':
+                        opre = op
+                    else:
+                        assert op == '||'
+                        opre = r'\|\|'
+                    needle = re.compile(fr"(?P<if>\s*#\s*(el)?if\s+).*?(?P<op>{opre}\s*)")
+                    match = needle.match(line)
+                    assert match is not None
+                    newline = line[:match.end('if')] + line[match.end('op'):]
+
+                    self._log(f"\tHardwiring partially resolved {macro}")
+                    self._log(f"\t\t- {line[:-1]}")
+                    self._log(f"\t\t+ {newline[:-1]}")
+
+                    outlines.append(newline)
+                    idx += 1
+                    continue
+
+            # Skip any statements we cannot fully compute
+            if not resolved:
+                outlines.append(line)
+                idx += 1
+                continue
+
+            prepend = []
+            if macro in self._replaces:
+                assert not ifdef
+                assert op is None
+                value = self._replaces.pop(macro)
+                prepend = [f"#define {macro} {value}\n"]
+
+            idx, replace = self._handle_if_block(macro, idx, is_true, prepend)
+            outlines += replace
+            changed = True
+
+        return changed, outlines
+
+    def preprocess(self, filename):
+        with open(filename, 'r') as f:
+            self._inlines = f.readlines()
+        changed = True
+        iters = 0
+        while changed:
+            iters += 1
+            changed, outlines = self._preprocess_once()
+            self._inlines = outlines
+
+        with open(filename, 'w') as f:
+            f.write(''.join(self._inlines))
+
+
+class Freestanding(object):
+    def __init__(
+            self, zstd_deps: str, mem: str, source_lib: str, output_lib: str,
+            external_xxhash: bool, xxh64_state: Optional[str],
+            xxh64_prefix: Optional[str], rewritten_includes: [(str, str)],
+            defs: [(str, Optional[str])], replaces: [(str, str)],
+            undefs: [str], excludes: [str], seds: [str],
+    ):
+        self._zstd_deps = zstd_deps
+        self._mem = mem
+        self._src_lib = source_lib
+        self._dst_lib = output_lib
+        self._external_xxhash = external_xxhash
+        self._xxh64_state = xxh64_state
+        self._xxh64_prefix = xxh64_prefix
+        self._rewritten_includes = rewritten_includes
+        self._defs = defs
+        self._replaces = replaces
+        self._undefs = undefs
+        self._excludes = excludes
+        self._seds = seds
+
+    def _dst_lib_file_paths(self):
+        """
+        Yields all the file paths in the dst_lib.
+        """
+        for root, dirname, filenames in os.walk(self._dst_lib):
+            for filename in filenames:
+                filepath = os.path.join(root, filename)
+                yield filepath
+
+    def _log(self, *args, **kwargs):
+        print(*args, **kwargs)
+
+    def _copy_file(self, lib_path):
+        if not (lib_path.endswith(".c") or lib_path.endswith(".h")):
+            return
+        if lib_path in SKIPPED_FILES:
+            self._log(f"\tSkipping file: {lib_path}")
+            return
+        if self._external_xxhash and lib_path in XXHASH_FILES:
+            self._log(f"\tSkipping xxhash file: {lib_path}")
+            return
+
+        src_path = os.path.join(self._src_lib, lib_path)
+        dst_path = os.path.join(self._dst_lib, lib_path)
+        self._log(f"\tCopying: {src_path} -> {dst_path}")
+        shutil.copyfile(src_path, dst_path)
+
+    def _copy_source_lib(self):
+        self._log("Copying source library into output library")
+
+        assert os.path.exists(self._src_lib)
+        os.makedirs(self._dst_lib, exist_ok=True)
+        self._copy_file("zstd.h")
+        self._copy_file("zstd_errors.h")
+        for subdir in INCLUDED_SUBDIRS:
+            src_dir = os.path.join(self._src_lib, subdir)
+            dst_dir = os.path.join(self._dst_lib, subdir)
+
+            assert os.path.exists(src_dir)
+            os.makedirs(dst_dir, exist_ok=True)
+
+            for filename in os.listdir(src_dir):
+                lib_path = os.path.join(subdir, filename)
+                self._copy_file(lib_path)
+
+    def _copy_zstd_deps(self):
+        dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h")
+        self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}")
+        shutil.copyfile(self._zstd_deps, dst_zstd_deps)
+
+    def _copy_mem(self):
+        dst_mem = os.path.join(self._dst_lib, "common", "mem.h")
+        self._log(f"Copying mem: {self._mem} -> {dst_mem}")
+        shutil.copyfile(self._mem, dst_mem)
+
+    def _hardwire_preprocessor(self, name: str, value: Optional[str] = None, undef=False):
+        """
+        If value=None then hardwire that it is defined, but not what the value is.
+        If undef=True then value must be None.
+        If value='' then the macro is defined to '' exactly.
+        """
+        assert not (undef and value is not None)
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+
+    def _hardwire_defines(self):
+        self._log("Hardwiring macros")
+        partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs)
+        for filepath in self._dst_lib_file_paths():
+            partial_preprocessor.preprocess(filepath)
+
+    def _remove_excludes(self):
+        self._log("Removing excluded sections")
+        for exclude in self._excludes:
+            self._log(f"\tRemoving excluded sections for: {exclude}")
+            begin_re = re.compile(f"BEGIN {exclude}")
+            end_re = re.compile(f"END {exclude}")
+            for filepath in self._dst_lib_file_paths():
+                file = FileLines(filepath)
+                outlines = []
+                skipped = []
+                emit = True
+                for line in file.lines:
+                    if emit and begin_re.search(line) is not None:
+                        assert end_re.search(line) is None
+                        emit = False
+                    if emit:
+                        outlines.append(line)
+                    else:
+                        skipped.append(line)
+                        if end_re.search(line) is not None:
+                            assert begin_re.search(line) is None
+                            self._log(f"\t\tRemoving excluded section: {exclude}")
+                            for s in skipped:
+                                self._log(f"\t\t\t- {s}")
+                            emit = True
+                            skipped = []
+                if not emit:
+                    raise RuntimeError("Excluded section unfinished!")
+                file.lines = outlines
+                file.write()
+
+    def _rewrite_include(self, original, rewritten):
+        self._log(f"\tRewriting include: {original} -> {rewritten}")
+        regex = re.compile(f"\\s*#\\s*include\\s*(?P<include>{original})")
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+            for i, line in enumerate(file.lines):
+                match = regex.match(line)
+                if match is None:
+                    continue
+                s = match.start('include')
+                e = match.end('include')
+                file.lines[i] = line[:s] + rewritten + line[e:]
+            file.write()
+
+    def _rewrite_includes(self):
+        self._log("Rewriting includes")
+        for original, rewritten in self._rewritten_includes:
+            self._rewrite_include(original, rewritten)
+
+    def _replace_xxh64_prefix(self):
+        if self._xxh64_prefix is None:
+            return
+        self._log(f"Replacing XXH64 prefix with {self._xxh64_prefix}")
+        replacements = []
+        if self._xxh64_state is not None:
+            replacements.append(
+                (re.compile(r"([^\w]|^)(?P<orig>XXH64_state_t)([^\w]|$)"), self._xxh64_state)
+            )
+        if self._xxh64_prefix is not None:
+            replacements.append(
+                (re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix)
+            )
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+            for i, line in enumerate(file.lines):
+                modified = False
+                for regex, replacement in replacements:
+                    match = regex.search(line)
+                    while match is not None:
+                        modified = True
+                        b = match.start('orig')
+                        e = match.end('orig')
+                        line = line[:b] + replacement + line[e:]
+                        match = regex.search(line)
+                if modified:
+                    self._log(f"\t- {file.lines[i][:-1]}")
+                    self._log(f"\t+ {line[:-1]}")
+                file.lines[i] = line
+            file.write()
+
+    def _parse_sed(self, sed):
+        assert sed[0] == 's'
+        delim = sed[1]
+        match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed)
+        assert match is not None
+        regex = re.compile(match.group(1))
+        format_str = match.group(2)
+        is_global = match.group(3) == 'g'
+        return regex, format_str, is_global
+
+    def _process_sed(self, sed):
+        self._log(f"Processing sed: {sed}")
+        regex, format_str, is_global = self._parse_sed(sed)
+
+        for filepath in self._dst_lib_file_paths():
+            file = FileLines(filepath)
+            for i, line in enumerate(file.lines):
+                modified = False
+                while True:
+                    match = regex.search(line)
+                    if match is None:
+                        break
+                    replacement = format_str.format(match.groups(''), match.groupdict(''))
+                    b = match.start()
+                    e = match.end()
+                    line = line[:b] + replacement + line[e:]
+                    modified = True
+                    if not is_global:
+                        break
+                if modified:
+                    self._log(f"\t- {file.lines[i][:-1]}")
+                    self._log(f"\t+ {line[:-1]}")
+                file.lines[i] = line
+            file.write()
+
+    def _process_seds(self):
+        self._log("Processing seds")
+        for sed in self._seds:
+            self._process_sed(sed)
+
+
+
+    def go(self):
+        self._copy_source_lib()
+        self._copy_zstd_deps()
+        self._copy_mem()
+        self._hardwire_defines()
+        self._remove_excludes()
+        self._rewrite_includes()
+        self._replace_xxh64_prefix()
+        self._process_seds()
+
+
+def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]:
+    output = []
+    for define in defines:
+        parsed = define.split('=')
+        if len(parsed) == 1:
+            output.append((parsed[0], None))
+        elif len(parsed) == 2:
+            output.append((parsed[0], parsed[1]))
+        else:
+            raise RuntimeError(f"Bad define: {define}")
+    return output
+
+
+def parse_pair(rewritten_includes: [str]) -> [(str, str)]:
+    output = []
+    for rewritten_include in rewritten_includes:
+        parsed = rewritten_include.split('=')
+        if len(parsed) == 2:
+            output.append((parsed[0], parsed[1]))
+        else:
+            raise RuntimeError(f"Bad rewritten include: {rewritten_include}")
+    return output
+
+
+
+def main(name, args):
+    parser = argparse.ArgumentParser(prog=name)
+    parser.add_argument("--zstd-deps", default="zstd_deps.h", help="Zstd dependencies file")
+    parser.add_argument("--mem", default="mem.h", help="Memory module")
+    parser.add_argument("--source-lib", default="../../lib", help="Location of the zstd library")
+    parser.add_argument("--output-lib", default="./freestanding_lib", help="Where to output the freestanding zstd library")
+    parser.add_argument("--xxhash", default=None, help="Alternate external xxhash include e.g. --xxhash='<xxhash.h>'. If set xxhash is not included.")
+    parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'")
+    parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64")
+    parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')")
+    parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.")
+    parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)")
+    parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed mutliple times)")
+    parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition")
+    parser.add_argument("-E", "--exclude", default=[], dest="excludes", action="append", help="Exclude all lines between 'BEGIN <EXCLUDE>' and 'END <EXCLUDE>'")
+    args = parser.parse_args(args)
+
+    # Always remove threading
+    if "ZSTD_MULTITHREAD" not in args.undefs:
+        args.undefs.append("ZSTD_MULTITHREAD")
+
+    args.defs = parse_optional_pair(args.defs)
+    for name, _ in args.defs:
+        if name in args.undefs:
+            raise RuntimeError(f"{name} is both defined and undefined!")
+
+    # Always set tracing to 0
+    if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs):
+        args.defs.append(("ZSTD_NO_TRACE", None))
+        args.defs.append(("ZSTD_TRACE", "0"))
+
+    args.replaces = parse_pair(args.replaces)
+    for name, _ in args.replaces:
+        if name in args.undefs or name in args.defs:
+            raise RuntimeError(f"{name} is both replaced and (un)defined!")
+
+    args.rewritten_includes = parse_pair(args.rewritten_includes)
+
+    external_xxhash = False
+    if args.xxhash is not None:
+        external_xxhash = True
+        args.rewritten_includes.append(('"(\\.\\./common/)?xxhash.h"', args.xxhash))
+
+    if args.xxh64_prefix is not None:
+        if not external_xxhash:
+            raise RuntimeError("--xxh64-prefix may only be used with --xxhash provided")
+
+    if args.xxh64_state is not None:
+        if not external_xxhash:
+            raise RuntimeError("--xxh64-state may only be used with --xxhash provided")
+
+    Freestanding(
+        args.zstd_deps,
+        args.mem,
+        args.source_lib,
+        args.output_lib,
+        external_xxhash,
+        args.xxh64_state,
+        args.xxh64_prefix,
+        args.rewritten_includes,
+        args.defs,
+        args.replaces,
+        args.undefs,
+        args.excludes,
+        args.seds,
+    ).go()
+
+if __name__ == "__main__":
+    main(sys.argv[0], sys.argv[1:])
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/.gitignore
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/.gitignore
@ -0,0 +1,3 @@
+# make artefact
+gen_html
+zstd_manual.html
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/Makefile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/Makefile
@ -0,0 +1,51 @@
+# ################################################################
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+CXXFLAGS ?= -O3
+CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CXXFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
+
+ZSTDAPI = ../../lib/zstd.h
+ZSTDMANUAL = ../../doc/zstd_manual.html
+LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
+LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
+LIBVER := $(shell echo $(LIBVER_SCRIPT))
+
+
+# Define *.exe as extension for Windows systems
+ifneq (,$(filter Windows%,$(OS)))
+EXT =.exe
+else
+EXT =
+endif
+
+
+.PHONY: default
+default: gen_html
+
+.PHONY: all
+all: manual
+
+gen_html: gen_html.cpp
+	$(CXX) $(FLAGS) $^ -o $@$(EXT)
+
+$(ZSTDMANUAL): gen_html $(ZSTDAPI)
+	echo "Update zstd manual in /doc"
+	./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL)
+
+.PHONY: manual
+manual: gen_html $(ZSTDMANUAL)
+
+.PHONY: clean
+clean:
+	@$(RM) gen_html$(EXT)
+	@echo Cleaning completed
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/README.md
@ -0,0 +1,31 @@
+gen_html - a program for automatic generation of zstd manual 
+============================================================
+
+#### Introduction
+
+This simple C++ program generates a single-page HTML manual from `zstd.h`.
+
+The format of recognized comment blocks is following:
+- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
+- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
+- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
+- comments of type `/*X` where `X` is different from above-mentioned are ignored
+
+Moreover:
+- `ZSTDLIB_API` is removed to improve readability
+- `typedef` are detected and included even if uncommented
+- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
+
+
+#### Usage
+
+The program requires 3 parameters:
+```
+gen_html [zstd_version] [input_file] [output_html]
+```
+
+To compile program and generate zstd manual we have used: 
+```
+make
+./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
+```
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/gen-zstd-manual.sh
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/gen-zstd-manual.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+
+LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
+LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
+LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
+LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
+
+echo ZSTD_VERSION=$LIBVER_SCRIPT
+./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/gen_html.cpp
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/gen_html/gen_html.cpp
@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+
+/* trim string at the beginning and at the end */
+void trim(string& s, string characters)
+{
+    size_t p = s.find_first_not_of(characters);
+    s.erase(0, p);
+
+    p = s.find_last_not_of(characters);
+    if (string::npos != p)
+       s.erase(p+1);
+}
+
+
+/* trim C++ style comments */
+void trim_comments(string &s)
+{
+    size_t spos, epos;
+
+    spos = s.find("/*");
+    epos = s.find("*/");
+    s = s.substr(spos+3, epos-(spos+3));
+}
+
+
+/* get lines until a given terminator */
+vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
+{
+    vector<string> out;
+    string line;
+    size_t epos;
+
+    while ((size_t)linenum < input.size()) {
+        line = input[linenum];
+
+        if (terminator.empty() && line.empty()) { linenum--; break; }
+
+        epos = line.find(terminator);
+        if (!terminator.empty() && epos!=string::npos) {
+            out.push_back(line);
+            break;
+        }
+        out.push_back(line);
+        linenum++;
+    }
+    return out;
+}
+
+
+/* print line with ZSTDLIB_API removed and C++ comments not bold */
+void print_line(stringstream &sout, string line)
+{
+    size_t spos;
+
+    if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
+    spos = line.find("/*");
+    if (spos!=string::npos) {
+        sout << line.substr(0, spos);
+        sout << "</b>" << line.substr(spos) << "<b>" << endl;
+    } else {
+      //  fprintf(stderr, "lines=%s\n", line.c_str());
+        sout << line << endl;
+    }
+}
+
+
+int main(int argc, char *argv[]) {
+    char exclam;
+    int linenum, chapter = 1;
+    vector<string> input, lines, comments, chapters;
+    string line, version;
+    size_t spos, l;
+    stringstream sout;
+    ifstream istream;
+    ofstream ostream;
+
+    if (argc < 4) {
+        cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
+        return 1;
+    }
+
+    version = "zstd " + string(argv[1]) + " Manual";
+
+    istream.open(argv[2], ifstream::in);
+    if (!istream.is_open()) {
+        cout << "Error opening file " << argv[2] << endl;
+        return 1;
+    }
+
+    ostream.open(argv[3], ifstream::out);
+    if (!ostream.is_open()) {
+        cout << "Error opening file " << argv[3] << endl;
+        return 1;
+   }
+
+    while (getline(istream, line)) {
+        input.push_back(line);
+    }
+
+    for (linenum=0; (size_t)linenum < input.size(); linenum++) {
+        line = input[linenum];
+
+        /* typedefs are detected and included even if uncommented */
+        if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
+            lines = get_lines(input, linenum, "}");
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
+        if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
+            sout << "<pre><b>";
+            print_line(sout, line);
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        spos = line.find("/**=");
+        if (spos==string::npos) {
+            spos = line.find("/*!");
+            if (spos==string::npos)
+                spos = line.find("/**");
+            if (spos==string::npos)
+                spos = line.find("/*-");
+            if (spos==string::npos)
+                spos = line.find("/*=");
+            if (spos==string::npos)
+                continue;
+            exclam = line[spos+2];
+        }
+        else exclam = '=';
+
+        comments = get_lines(input, linenum, "*/");
+        if (!comments.empty()) comments[0] = line.substr(spos+3);
+        if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
+        for (l=0; l<comments.size(); l++) {
+            if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
+            else if (comments[l].find("  *")==0) comments[l] = comments[l].substr(3);
+            trim(comments[l], "*-=");
+        }
+        while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
+        while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
+
+        /* comments of type /*! mean: this is a function declaration; switch comments with declarations */
+        if (exclam == '!') {
+            if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
+            linenum++;
+            lines = get_lines(input, linenum, "");
+
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+              //  fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
+                string fline = lines[l];
+                if (fline.substr(0, 12) == "ZSTDLIB_API " ||
+                    fline.substr(0, 12) == string(12, ' '))
+                  fline = fline.substr(12);
+                print_line(sout, fline);
+            }
+            sout << "</b><p>";
+            for (l=0; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            sout << "</p></pre><BR>" << endl << endl;
+        } else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
+            trim(comments[0], " ");
+            sout << "<h3>" << comments[0] << "</h3><pre>";
+            for (l=1; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            sout << "</pre><b><pre>";
+            lines = get_lines(input, ++linenum, "");
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</pre></b><BR>" << endl;
+        } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
+            if (comments.empty()) continue;
+
+            trim(comments[0], " ");
+            sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
+            chapters.push_back(comments[0]);
+            chapter++;
+
+            for (l=1; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            if (comments.size() > 1)
+                sout << "<BR></pre>" << endl << endl;
+            else
+                sout << "</pre>" << endl << endl;
+        }
+    }
+
+    ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
+    ostream << "<h1>" << version << "</h1>\n";
+
+    ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
+    for (size_t i=0; i<chapters.size(); i++)
+        ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
+    ostream << "</ol>\n<hr>\n";
+
+    ostream << sout.str();
+    ostream << "</html>" << endl << "</body>" << endl;
+
+    return 0;
+}
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/.gitignore
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/.gitignore
@ -0,0 +1,2 @@
+# build artifacts
+largeNbDicts
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/Makefile
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/Makefile
@ -0,0 +1,58 @@
+# ################################################################
+# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+PROGDIR = ../../programs
+LIBDIR  = ../../lib
+
+LIBZSTD = $(LIBDIR)/libzstd.a
+
+CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
+
+CFLAGS  ?= -O3
+CFLAGS  += -std=gnu99
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+            -Wstrict-aliasing=1 -Wswitch-enum \
+            -Wstrict-prototypes -Wundef -Wpointer-arith \
+            -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+            -Wredundant-decls
+CFLAGS  += $(DEBUGFLAGS) $(MOREFLAGS)
+
+
+default: largeNbDicts
+
+all : largeNbDicts
+
+largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+.PHONY: $(LIBZSTD)
+$(LIBZSTD):
+	$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
+
+benchfn.o: $(PROGDIR)/benchfn.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+timefn.o: $(PROGDIR)/timefn.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+datagen.o: $(PROGDIR)/datagen.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+util.o: $(PROGDIR)/util.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+
+xxhash.o : $(LIBDIR)/common/xxhash.c
+	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
+
+
+clean:
+	$(RM) *.o
+	$(MAKE) -C $(LIBDIR) clean > /dev/null
+	$(RM) largeNbDicts
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/README.md
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/README.md
@ -0,0 +1,25 @@
+largeNbDicts
+=====================
+
+`largeNbDicts` is a benchmark test tool
+dedicated to the specific scenario of
+dictionary decompression using a very large number of dictionaries.
+When dictionaries are constantly changing, they are always "cold",
+suffering from increased latency due to cache misses.
+
+The tool is created in a bid to investigate performance for this scenario,
+and experiment mitigation techniques.
+
+Command line :
+```
+largeNbDicts [Options] filename(s)
+
+Options :
+-r           : recursively load all files in subdirectories (default: off)
+-B#          : split input into blocks of size # (default: no split)
+-#           : use compression level # (default: 3)
+-D #         : use # as a dictionary (default: create one)
+-i#          : nb benchmark rounds (default: 6)
+--nbDicts=#  : set nb of dictionaries to # (default: one per block)
+-h           : help (this text)
+```
--- a/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/largeNbDicts.c
+++ b/TurboPFor-Integer-Compression/lib/ext/bitshuffle/zstd/contrib/largeNbDicts/largeNbDicts.c
@ -0,0 +1,998 @@
+/*
+ * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* largeNbDicts
+ * This is a benchmark test tool
+ * dedicated to the specific case of dictionary decompression
+ * using a very large nb of dictionaries
+ * thus suffering latency from lots of cache misses.
+ * It's created in a bid to investigate performance and find optimizations. */
+
+
+/*---  Dependencies  ---*/
+
+#include <stddef.h>   /* size_t */
+#include <stdlib.h>   /* malloc, free, abort */
+#include <stdio.h>    /* fprintf */
+#include <limits.h>   /* UINT_MAX */
+#include <assert.h>   /* assert */
+
+#include "util.h"
+#include "benchfn.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "zdict.h"
+
+
+/*---  Constants  --- */
+
+#define KB  *(1<<10)
+#define MB  *(1<<20)
+
+#define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
+#define DICTSIZE  (4 KB)
+#define CLEVEL_DEFAULT 3
+
+#define BENCH_TIME_DEFAULT_S   6
+#define RUN_TIME_DEFAULT_MS    1000
+#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
+
+#define DISPLAY_LEVEL_DEFAULT 3
+
+#define BENCH_SIZE_MAX (1200 MB)
+
+
+/*---  Macros  ---*/
+
+#define CONTROL(c)   { if (!(c)) abort(); }
+#undef MIN
+#define MIN(a,b)     ((a) < (b) ? (a) : (b))
+
+
+/*---  Display Macros  ---*/
+
+#define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
+static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
+
+
+/*---  buffer_t  ---*/
+
+typedef struct {
+    void* ptr;
+    size_t size;
+    size_t capacity;
+} buffer_t;
+
+static const buffer_t kBuffNull = { NULL, 0, 0 };
+
+/* @return : kBuffNull if any error */
+static buffer_t createBuffer(size_t capacity)
+{
+    assert(capacity > 0);
+    void* const ptr = malloc(capacity);
+    if (ptr==NULL) return kBuffNull;
+
+    buffer_t buffer;
+    buffer.ptr = ptr;
+    buffer.capacity = capacity;
+    buffer.size = 0;
+    return buffer;
+}
+
+static void freeBuffer(buffer_t buff)
+{
+    free(buff.ptr);
+}
+
+
+static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
+{
+    size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
+    buff->size = readSize;
+}
+
+
+/* @return : kBuffNull if any error */
+static buffer_t createBuffer_fromFile(const char* fileName)
+{
+    U64 const fileSize = UTIL_getFileSize(fileName);
+    size_t const bufferSize = (size_t) fileSize;
+
+    if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
+    assert((U64)bufferSize == fileSize);   /* check overflow */
+
+    {   FILE* const f = fopen(fileName, "rb");
+        if (f == NULL) return kBuffNull;
+
+        buffer_t buff = createBuffer(bufferSize);
+        CONTROL(buff.ptr != NULL);
+
+        fillBuffer_fromHandle(&buff, f);
+        CONTROL(buff.size == buff.capacity);
+
+        fclose(f);   /* do nothing specific if fclose() fails */
+        return buff;
+    }
+}
+
+
+/* @return : kBuffNull if any error */
+static buffer_t
+createDictionaryBuffer(const char* dictionaryName,
+                       const void* srcBuffer,
+                       const size_t* srcBlockSizes, size_t nbBlocks,
+                       size_t requestedDictSize)
+{
+    if (dictionaryName) {
+        DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
+        return createBuffer_fromFile(dictionaryName);  /* note : result might be kBuffNull */
+
+    } else {
+
+        DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
+                        (unsigned)requestedDictSize);
+        void* const dictBuffer = malloc(requestedDictSize);
+        CONTROL(dictBuffer != NULL);
+
+        assert(nbBlocks <= UINT_MAX);
+        size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
+                                                      srcBuffer,
+                                                      srcBlockSizes, (unsigned)nbBlocks);
+        CONTROL(!ZSTD_isError(dictSize));
+
+        buffer_t result;
+        result.ptr = dictBuffer;
+        result.capacity = requestedDictSize;
+        result.size = dictSize;
+        return result;
+    }
+}
+
+static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
+    ZSTD_CCtxParams_init(params, compressionLevel);
+    ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1);
+    ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel);
+
+    ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem);
+
+    ZSTD_freeCCtxParams(params);
+    return cdict;
+}
+
+/*! BMK_loadFiles() :
+ *  Loads `buffer`, with content from files listed within `fileNamesTable`.
+ *  Fills `buffer` entirely.
+ * @return : 0 on success, !=0 on error */
+static int loadFiles(void* buffer, size_t bufferSize,
+                     size_t* fileSizes,
+                     const char* const * fileNamesTable, unsigned nbFiles)
+{
+    size_t pos = 0, totalSize = 0;
+
+    for (unsigned n=0; n<nbFiles; n++) {
+        U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
+        if (UTIL_isDirectory(fileNamesTable[n])) {
+            fileSizes[n] = 0;
+            continue;
+        }
+        if (fileSize == UTIL_FILESIZE_UNKNOWN) {
+            fileSizes[n] = 0;
+            continue;
+        }
+
+        FILE* const f = fopen(fileNamesTable[n], "rb");
+        assert(f!=NULL);
+
+        assert(pos <= bufferSize);
+        assert(fileSize <= bufferSize - pos);
+
+        {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
+            assert(readSize == fileSize);
+            pos += readSize;
+        }
+        fileSizes[n] = (size_t)fileSize;
+        totalSize += (size_t)fileSize;
+        fclose(f);
+    }
+
+    assert(totalSize == bufferSize);
+    return 0;
+}
+
+
+
+/*---  slice_collection_t  ---*/
+
+typedef struct {
+    void** slicePtrs;
+    size_t* capacities;
+    size_t nbSlices;
+} slice_collection_t;
+
+static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
+
+static void freeSliceCollection(slice_collection_t collection)
+{
+    free(collection.slicePtrs);
+    free(collection.capacities);
+}
+
+/* shrinkSizes() :
+ * downsizes sizes of slices within collection, according to `newSizes`.
+ * every `newSizes` entry must be <= than its corresponding collection size */
+void shrinkSizes(slice_collection_t collection,
+                 const size_t* newSizes)  /* presumed same size as collection */
+{
+    size_t const nbSlices = collection.nbSlices;
+    for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
+        assert(newSizes[blockNb] <= collection.capacities[blockNb]);
+        collection.capacities[blockNb] = newSizes[blockNb];
+    }
+}
+
+
+/* splitSlices() :
+ * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
+ *            otherwise, creates exactly nbSlices slices,
+ *            by either truncating input (when smaller)
+ *            or repeating input from beginning */
+static slice_collection_t
+splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
+{
+    if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
+    size_t nbSrcBlocks = 0;
+    for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
+        size_t pos = 0;
+        while (pos <= srcSlices.capacities[ssnb]) {
+            nbSrcBlocks++;
+            pos += blockSize;
+        }
+    }
+
+    if (nbSlices == 0) nbSlices = nbSrcBlocks;
+
+    void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
+    size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
+    if (sliceTable == NULL || capacities == NULL) {
+        free(sliceTable);
+        free(capacities);
+        return kNullCollection;
+    }
+
+    size_t ssnb = 0;
+    for (size_t sliceNb=0; sliceNb < nbSlices; ) {
+        ssnb = (ssnb + 1) % srcSlices.nbSlices;
+        size_t pos = 0;
+        char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
+        while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
+            size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
+            sliceTable[sliceNb] = ptr + pos;
+            capacities[sliceNb] = size;
+            sliceNb++;
+            pos += blockSize;
+        }
+    }
+
+    slice_collection_t result;
+    result.nbSlices = nbSlices;
+    result.slicePtrs = sliceTable;
+    result.capacities = capacities;
+    return result;
+}
+
+
+static size_t sliceCollection_totalCapacity(slice_collection_t sc)
+{
+    size_t totalSize = 0;
+    for (size_t n=0; n<sc.nbSlices; n++)
+        totalSize += sc.capacities[n];
+    return totalSize;
+}
+
+
+/* ---  buffer collection  --- */
+
+typedef struct {
+    buffer_t buffer;
+    slice_collection_t slices;
+} buffer_collection_t;
+
+
+static void freeBufferCollection(buffer_collection_t bc)
+{
+    freeBuffer(bc.buffer);
+    freeSliceCollection(bc.slices);
+}
+
+
+static buffer_collection_t
+createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
+{
+    size_t const bufferSize = sliceCollection_totalCapacity(sc);
+
+    buffer_t buffer = createBuffer(bufferSize);
+    CONTROL(buffer.ptr != NULL);
+
+    size_t const nbSlices = sc.nbSlices;
+    void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
+    CONTROL(slices != NULL);
+
+    size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
+    CONTROL(capacities != NULL);
+
+    char* const ptr = (char*)buffer.ptr;
+    size_t pos = 0;
+    for (size_t n=0; n < nbSlices; n++) {
+        capacities[n] = sc.capacities[n];
+        slices[n] = ptr + pos;
+        pos += capacities[n];
+    }
+
+    buffer_collection_t result;
+    result.buffer = buffer;
+    result.slices.nbSlices = nbSlices;
+    result.slices.capacities = capacities;
+    result.slices.slicePtrs = slices;
+    return result;
+}
+
+static buffer_collection_t
+createBufferCollection_fromSliceCollection(slice_collection_t sc)
+{
+    size_t const bufferSize = sliceCollection_totalCapacity(sc);
+
+    buffer_t buffer = createBuffer(bufferSize);
+    CONTROL(buffer.ptr != NULL);
+
+    size_t const nbSlices = sc.nbSlices;
+    void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
+    CONTROL(slices != NULL);
+
+    size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
+    CONTROL(capacities != NULL);
+
+    char* const ptr = (char*)buffer.ptr;
+    size_t pos = 0;
+    for (size_t n=0; n < nbSlices; n++) {
+        capacities[n] = sc.capacities[n];
+        slices[n] = ptr + pos;
+        pos += capacities[n];
+    }
+
+    for (size_t i = 0; i < nbSlices; i++) {
+        memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
+        capacities[i] = sc.capacities[i];
+    }
+
+    buffer_collection_t result;
+    result.buffer = buffer;
+    result.slices.nbSlices = nbSlices;
+    result.slices.capacities = capacities;
+    result.slices.slicePtrs = slices;
+
+    return result;
+}
+
+/* @return : kBuffNull if any error */
+static buffer_collection_t
+createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
+{
+    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
+    assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
+    assert(totalSizeToLoad <= BENCH_SIZE_MAX);
+    size_t const loadedSize = (size_t)totalSizeToLoad;
+    assert(loadedSize > 0);
+    void* const srcBuffer = malloc(loadedSize);
+    assert(srcBuffer != NULL);
+
+    assert(nbFiles > 0);
+    size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
+    assert(fileSizes != NULL);
+
+    /* Load input buffer */
+    int const errorCode = loadFiles(srcBuffer, loadedSize,
+                                    fileSizes,
+                                    fileNamesTable, nbFiles);
+    assert(errorCode == 0);
+
+    void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
+    assert(sliceTable != NULL);
+
+    char* const ptr = (char*)srcBuffer;
+    size_t pos = 0;
+    unsigned fileNb = 0;
+    for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
+        sliceTable[fileNb] = ptr + pos;
+        pos += fileSizes[fileNb];
+    }
+    assert(pos == loadedSize);
+    assert(fileNb == nbFiles);
+
+
+    buffer_t buffer;
+    buffer.ptr = srcBuffer;
+    buffer.capacity = loadedSize;
+    buffer.size = loadedSize;
+
+    slice_collection_t slices;
+    slices.slicePtrs = sliceTable;
+    slices.capacities = fileSizes;
+    slices.nbSlices = nbFiles;
+
+    buffer_collection_t bc;
+    bc.buffer = buffer;
+    bc.slices = slices;
+    return bc;
+}
+
+
+
+
+/*---  ddict_collection_t  ---*/
+
+typedef struct {
+    ZSTD_DDict** ddicts;
+    size_t nbDDict;
+} ddict_collection_t;
+
+typedef struct {
+    ZSTD_CDict** cdicts;
+    size_t nbCDict;
+} cdict_collection_t;
+
+static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
+
+static void freeCDictCollection(cdict_collection_t cdictc)
+{
+    for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
+        ZSTD_freeCDict(cdictc.cdicts[dictNb]);
+    }
+    free(cdictc.cdicts);
+}
+
+/* returns .buffers=NULL if operation fails */
+static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch)
+{
+    ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
+    if (cdicts==NULL) return kNullCDictCollection;
+    for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
+        cdicts[dictNb] = dedicatedDictSearch ?
+            createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) :
+            ZSTD_createCDict(dictBuffer, dictSize, cLevel);
+        CONTROL(cdicts[dictNb] != NULL);
+    }
+    cdict_collection_t cdictc;
+    cdictc.cdicts = cdicts;
+    cdictc.nbCDict = nbCDict;
+    return cdictc;
+}
+
+static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
+
+static void freeDDictCollection(ddict_collection_t ddictc)
+{
+    for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
+        ZSTD_freeDDict(ddictc.ddicts[dictNb]);
+    }
+    free(ddictc.ddicts);
+}
+
+/* returns .buffers=NULL if operation fails */
+static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
+{
+    ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
+    assert(ddicts != NULL);
+    if (ddicts==NULL) return kNullDDictCollection;
+    for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
+        ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
+        assert(ddicts[dictNb] != NULL);
+    }
+    ddict_collection_t ddictc;
+    ddictc.ddicts = ddicts;
+    ddictc.nbDDict = nbDDict;
+    return ddictc;
+}
+
+
+/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
+void shuffleCDictionaries(cdict_collection_t dicts)
+{
+    size_t const nbDicts = dicts.nbCDict;
+    for (size_t r=0; r<nbDicts; r++) {
+        size_t const d = (size_t)rand() % nbDicts;
+        ZSTD_CDict* tmpd = dicts.cdicts[d];
+        dicts.cdicts[d] = dicts.cdicts[r];
+        dicts.cdicts[r] = tmpd;
+    }
+    for (size_t r=0; r<nbDicts; r++) {
+        size_t const d1 = (size_t)rand() % nbDicts;
+        size_t const d2 = (size_t)rand() % nbDicts;
+        ZSTD_CDict* tmpd = dicts.cdicts[d1];
+        dicts.cdicts[d1] = dicts.cdicts[d2];
+        dicts.cdicts[d2] = tmpd;
+    }
+}
+
+/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
+void shuffleDDictionaries(ddict_collection_t dicts)
+{
+    size_t const nbDicts = dicts.nbDDict;
+    for (size_t r=0; r<nbDicts; r++) {
+        size_t const d = (size_t)rand() % nbDicts;
+        ZSTD_DDict* tmpd = dicts.ddicts[d];
+        dicts.ddicts[d] = dicts.ddicts[r];
+        dicts.ddicts[r] = tmpd;
+    }
+    for (size_t r=0; r<nbDicts; r++) {
+        size_t const d1 = (size_t)rand() % nbDicts;
+        size_t const d2 = (size_t)rand() % nbDicts;
+        ZSTD_DDict* tmpd = dicts.ddicts[d1];
+        dicts.ddicts[d1] = dicts.ddicts[d2];
+        dicts.ddicts[d2] = tmpd;
+    }
+}
+
+
+/* ---   Compression  --- */
+
+/* compressBlocks() :
+ * @return : total compressed size of all blocks,
+ *        or 0 if error.
+ */
+static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
+                             slice_collection_t dstBlockBuffers,
+                             slice_collection_t srcBlockBuffers,
+                             ZSTD_CDict* cdict, int cLevel)
+{
+    size_t const nbBlocks = srcBlockBuffers.nbSlices;
+    assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
+
+    ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+    assert(cctx != NULL);
+
+    size_t totalCSize = 0;
+    for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
+        size_t cBlockSize;
+        if (cdict == NULL) {
+            cBlockSize = ZSTD_compressCCtx(cctx,
+                            dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
+                            srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
+                            cLevel);
+        } else {
+            cBlockSize = ZSTD_compress_usingCDict(cctx,
+                            dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
+                            srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
+                            cdict);
+        }
+        CONTROL(!ZSTD_isError(cBlockSize));
+        if (cSizes) cSizes[blockNb] = cBlockSize;
+        totalCSize += cBlockSize;
+    }
+    return totalCSize;
+}
+
+
+/* ---  Benchmark  --- */
+
+typedef struct {
+    ZSTD_CCtx* cctx;
+    size_t nbDicts;
+    size_t dictNb;
+    cdict_collection_t dictionaries;
+} compressInstructions;
+
+compressInstructions createCompressInstructions(cdict_collection_t dictionaries)
+{
+    compressInstructions ci;
+    ci.cctx = ZSTD_createCCtx();
+    CONTROL(ci.cctx != NULL);
+    ci.nbDicts = dictionaries.nbCDict;
+    ci.dictNb = 0;
+    ci.dictionaries = dictionaries;
+    return ci;
+}
+
+void freeCompressInstructions(compressInstructions ci)
+{
+    ZSTD_freeCCtx(ci.cctx);
+}
+
+typedef struct {
+    ZSTD_DCtx* dctx;
+    size_t nbDicts;
+    size_t dictNb;
+    ddict_collection_t dictionaries;
+} decompressInstructions;
+
+decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
+{
+    decompressInstructions di;
+    di.dctx = ZSTD_createDCtx();
+    assert(di.dctx != NULL);
+    di.nbDicts = dictionaries.nbDDict;
+    di.dictNb = 0;
+    di.dictionaries = dictionaries;
+    return di;
+}
+
+void freeDecompressInstructions(decompressInstructions di)
+{
+    ZSTD_freeDCtx(di.dctx);
+}
+
+/* benched function */
+size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
+{
+    compressInstructions* const ci = (compressInstructions*) payload;
+    (void)dstCapacity;
+
+    ZSTD_compress_usingCDict(ci->cctx,
+                    dst, srcSize,
+                    src, srcSize,
+                    ci->dictionaries.cdicts[ci->dictNb]);
+
+    ci->dictNb = ci->dictNb + 1;
+    if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
+
+    return srcSize;
+}
+
+/* benched function */
+size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
+{
+    decompressInstructions* const di = (decompressInstructions*) payload;
+
+    size_t const result = ZSTD_decompress_usingDDict(di->dctx,
+                                        dst, dstCapacity,
+                                        src, srcSize,
+                                        di->dictionaries.ddicts[di->dictNb]);
+
+    di->dictNb = di->dictNb + 1;
+    if (di->dictNb >= di->nbDicts) di->dictNb = 0;
+
+    return result;
+}
+
+
+static int benchMem(slice_collection_t dstBlocks,
+                    slice_collection_t srcBlocks,
+                    ddict_collection_t ddictionaries,
+                    cdict_collection_t cdictionaries,
+                    unsigned nbRounds, int benchCompression)
+{
+    assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
+
+    unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
+    unsigned const total_time_ms = nbRounds * ms_per_round;
+
+    double bestSpeed = 0.;
+
+    BMK_timedFnState_t* const benchState =
+            BMK_createTimedFnState(total_time_ms, ms_per_round);
+
+    decompressInstructions di = createDecompressInstructions(ddictionaries);
+    compressInstructions ci = createCompressInstructions(cdictionaries);
+    void* payload = benchCompression ? (void*)&ci : (void*)&di;
+    BMK_benchParams_t const bp = {
+        .benchFn = benchCompression ? compress : decompress,
+        .benchPayload = payload,
+        .initFn = NULL,
+        .initPayload = NULL,
+        .errorFn = ZSTD_isError,
+        .blockCount = dstBlocks.nbSlices,
+        .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
+        .srcSizes = srcBlocks.capacities,
+        .dstBuffers = dstBlocks.slicePtrs,
+        .dstCapacities = dstBlocks.capacities,
+        .blockResults = NULL
+    };
+
+    for (;;) {
+        BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
+        CONTROL(BMK_isSuccessful_runOutcome(outcome));
+
+        BMK_runTime_t const result = BMK_extract_runTime(outcome);
+        double const dTime_ns = result.nanoSecPerRun;
+        double const dTime_sec = (double)dTime_ns / 1000000000;
+        size_t const srcSize = result.sumOfReturn;
+        double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
+        if (speed_MBps > bestSpeed) bestSpeed = speed_MBps;
+        if (benchCompression)
+            DISPLAY("Compression Speed : %.1f MB/s \r", bestSpeed);
+        else
+            DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
+
+        fflush(stdout);
+        if (BMK_isCompleted_TimedFn(benchState)) break;
+    }
+    DISPLAY("\n");
+
+    freeDecompressInstructions(di);
+    freeCompressInstructions(ci);
+    BMK_freeTimedFnState(benchState);
+
+    return 0;   /* success */
+}
+
+
+/*! bench() :
+ *  fileName : file to load for benchmarking purpose
+ *  dictionary : optional (can be NULL), file to load as dictionary,
+ *              if none provided : will be calculated on the fly by the program.
+ * @return : 0 is success, 1+ otherwise */
+int bench(const char** fileNameTable, unsigned nbFiles,
+          const char* dictionary,
+          size_t blockSize, int clevel,
+          unsigned nbDictMax, unsigned nbBlocks,
+          unsigned nbRounds, int benchCompression,
+          int dedicatedDictSearch)
+{
+    int result = 0;
+
+    DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
+    buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
+    CONTROL(srcs.buffer.ptr != NULL);
+    buffer_t srcBuffer = srcs.buffer;
+    size_t const srcSize = srcBuffer.size;
+    DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
+                    (double)srcSize / (1 MB));
+
+    slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
+    nbBlocks = (unsigned)(srcSlices.nbSlices);
+    DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
+    if (blockSize)
+        DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
+    DISPLAYLEVEL(3, "\n");
+    size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
+
+
+    size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
+    CONTROL(dstCapacities != NULL);
+    size_t dstBufferCapacity = 0;
+    for (size_t bnb=0; bnb<nbBlocks; bnb++) {
+        dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
+        dstBufferCapacity += dstCapacities[bnb];
+    }
+
+    buffer_t dstBuffer = createBuffer(dstBufferCapacity);
+    CONTROL(dstBuffer.ptr != NULL);
+
+    void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
+    CONTROL(sliceTable != NULL);
+
+    {   char* const ptr = dstBuffer.ptr;
+        size_t pos = 0;
+        for (size_t snb=0; snb < nbBlocks; snb++) {
+            sliceTable[snb] = ptr + pos;
+            pos += dstCapacities[snb];
+    }   }
+
+    slice_collection_t dstSlices;
+    dstSlices.capacities = dstCapacities;
+    dstSlices.slicePtrs = sliceTable;
+    dstSlices.nbSlices = nbBlocks;
+
+
+    /* dictionary determination */
+    buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
+                                srcs.buffer.ptr,
+                                srcs.slices.capacities, srcs.slices.nbSlices,
+                                DICTSIZE);
+    CONTROL(dictBuffer.ptr != NULL);
+
+    ZSTD_CDict* const cdict = dedicatedDictSearch ?
+        createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) :
+        ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
+    CONTROL(cdict != NULL);
+
+    size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
+    CONTROL(cTotalSizeNoDict != 0);
+    DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
+                    clevel,
+                    (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
+
+    size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
+    CONTROL(cSizes != NULL);
+
+    size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
+    CONTROL(cTotalSize != 0);
+    DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
+                    (unsigned)dictBuffer.size,
+                    (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
+
+    /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
+    shrinkSizes(dstSlices, cSizes);
+
+    unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
+
+    cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch);
+    CONTROL(cdictionaries.cdicts != NULL);
+
+    ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
+    CONTROL(ddictionaries.ddicts != NULL);
+
+    if (benchCompression) {
+        size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
+        size_t const allDictMem = dictMem * nbDicts;
+        DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
+                        nbDicts, (double)allDictMem / (1 MB));
+
+        shuffleCDictionaries(cdictionaries);
+
+        buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
+        CONTROL(resultCollection.buffer.ptr != NULL);
+
+        result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression);
+
+        freeBufferCollection(resultCollection);
+    } else {
+        size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
+        size_t const allDictMem = dictMem * nbDicts;
+        DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
+                        nbDicts, (double)allDictMem / (1 MB));
+
+        shuffleDDictionaries(ddictionaries);
+
+        buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
+        CONTROL(resultCollection.buffer.ptr != NULL);
+
+        result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression);
+
+        freeBufferCollection(resultCollection);
+    }
+
+    /* free all heap objects in reverse order */
+    freeCDictCollection(cdictionaries);
+    freeDDictCollection(ddictionaries);
+    free(cSizes);
+    ZSTD_freeCDict(cdict);
+    freeBuffer(dictBuffer);
+    freeSliceCollection(dstSlices);
+    freeBuffer(dstBuffer);
+    freeSliceCollection(srcSlices);
+    freeBufferCollection(srcs);
+
+    return result;
+}
+
+
+
+/* ---  Command Line  --- */
+
+/*! readU32FromChar() :
+ * @return : unsigned integer value read from input in `char` format.
+ *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
+ *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+ *  Note : function will exit() program if digit sequence overflows */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+    unsigned result = 0;
+    while ((**stringPtr >='0') && (**stringPtr <='9')) {
+        unsigned const max = (((unsigned)(-1)) / 10) - 1;
+        assert(result <= max);   /* check overflow */
+        result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
+    }
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        unsigned const maxK = ((unsigned)(-1)) >> 10;
+        assert(result <= maxK);   /* check overflow */
+        result <<= 10;
+        if (**stringPtr=='M') {
+            assert(result <= maxK);   /* check overflow */
+            result <<= 10;
+        }
+        (*stringPtr)++;  /* skip `K` or `M` */
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
+    return result;
+}
+
+/** longCommandWArg() :
+ *  check if *stringPtr is the same as longCommand.
+ *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
+ * @return 0 and doesn't modify *stringPtr otherwise.
+ */
+static int longCommandWArg(const char** stringPtr, const char* longCommand)
+{
+    size_t const comSize = strlen(longCommand);
+    int const result = !strncmp(*stringPtr, longCommand, comSize);
+    if (result) *stringPtr += comSize;
+    return result;
+}
+
+
+int usage(const char* exeName)
+{
+    DISPLAY (" \n");
+    DISPLAY (" %s [Options] filename(s) \n", exeName);
+    DISPLAY (" \n");
+    DISPLAY ("Options : \n");
+    DISPLAY ("-z          : benchmark compression (default) \n");
+    DISPLAY ("-d          : benchmark decompression \n");
+    DISPLAY ("-r          : recursively load all files in subdirectories (default: off) \n");
+    DISPLAY ("-B#         : split input into blocks of size # (default: no split) \n");
+    DISPLAY ("-#          : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
+    DISPLAY ("-D #        : use # as a dictionary (default: create one) \n");
+    DISPLAY ("-i#         : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
+    DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
+    DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
+    DISPLAY ("-h          : help (this text) \n");
+    return 0;
+}
+
+int bad_usage(const char* exeName)
+{
+    DISPLAY (" bad usage : \n");
+    usage(exeName);
+    return 1;
+}
+
+int main (int argc, const char** argv)
+{
+    int recursiveMode = 0;
+    int benchCompression = 1;
+    int dedicatedDictSearch = 0;
+    unsigned nbRounds = BENCH_TIME_DEFAULT_S;
+    const char* const exeName = argv[0];
+
+    if (argc < 2) return bad_usage(exeName);
+
+    const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
+    assert(nameTable != NULL);
+    unsigned nameIdx = 0;
+
+    const char* dictionary = NULL;
+    int cLevel = CLEVEL_DEFAULT;
+    size_t blockSize = BLOCKSIZE_DEFAULT;
+    unsigned nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
+    unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
+
+    for (int argNb = 1; argNb < argc ; argNb++) {
+        const char* argument = argv[argNb];
+        if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
+        if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
+        if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
+        if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
+        if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
+        if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
+        if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
+        if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
+        if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
+        /* anything that's not a command is a filename */
+        nameTable[nameIdx++] = argument;
+    }
+
+    FileNamesTable* filenameTable;
+
+    if (recursiveMode) {
+#ifndef UTIL_HAS_CREATEFILELIST
+        assert(0);   /* missing capability, do not run */
+#endif
+        filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
+    } else {
+        filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
+        nameTable = NULL;  /* UTIL_createFileNamesTable() takes ownership of nameTable */
+    }
+
+    int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch);
+
+    UTIL_freeFileNamesTable(filenameTable);
+    free(nameTable);
+
+    return result;
+}
--- a/Show More
+++ b/Show More