Snappy 快速压缩工具

发布时间: 更新时间: 总字数:809 阅读时间:2m 作者: IP上海 分享 网址

Snappy(也称Zippy)是Google基于LZ77的思路用C++语言编写的快速数据压缩与解压程序库,在2011年开源

介绍

  • Snappy 的特点是非常高的速度和合理的压缩率
    • 使用一个运行在64位酷睿i7处理器的单个核心,压缩速度250 MB/s,解压速度500 MB/s
    • 压缩率比gzip低20-100%
  • Stream format
  • 应用
    • Google 内部的 BigTable、MapReduce 等
    • 开源数据库,如 Cassandra、Couchbase、Hadoop、LevelDB、MongoDB、RocksDB、Lucene、Spark 和 InfluxDB
  • 源码
  • lib 库实现

安装

snappy lib

apt update
apt install cmake gcc build-essential -y

# wget https://github.com/google/snappy/archive/refs/tags/1.1.10.tar.gz
# tar -zxvf snappy-1.1.10.tar.gz
# cd snappy-1.1.10

git clone https://github.com/google/snappy.git -b 1.1.10
cd snappy
git submodule update --init

mkdir build
cd build
cmake ../
make
make install

# cmake -S $(pwd) -B build -DCMAKE_BUILD_TYPE=RelWithDebInfo
# cmake --build build --target install --config "RelWithDebInfo"

# 安装在如下目录
-- Up-to-date: /usr/local/lib/libsnappy.a
-- Up-to-date: /usr/local/include/snappy-c.h
-- Up-to-date: /usr/local/include/snappy-sinksource.h
-- Up-to-date: /usr/local/include/snappy.h
-- Up-to-date: /usr/local/include/snappy-stubs-public.h

说明:

  • libsnappy.a 是 snappy 的静态库
  • snappy-stubs-public.h 是 snappy 头文件

验证 snappy lib

可跳过该步骤

  • test.cc
#include "snappy.h"
#include <string>
#include <iostream>
int main() {
    std::string s = "abc";
    std::string d;
    std::cout<<"original:"<<s<<std::endl;
    snappy::Compress(s.data(), s.size(), &d);
    std::cout<<"result:"<<d<<std::endl;
    std::cout<<"original-size:"<<s.size()<<" result-size:"<<d.size()<<std::endl;
    return 0;
}
$ g++ -o example test.cc -lsnappy
$ ./example
original:abc
resultabc
original-size:3 result-size:5

snzip

  • snzip 基于snappy的压缩/解压缩命令行工具
# 由于官方的包仅能编译出 libsnappy.a,安装 libsnappy-dev,参考 https://packages.ubuntu.com/search?keywords=libsnappy-dev
apt install libsnappy-dev

wget https://github.com/kubo/snzip/releases/download/v1.0.5/snzip-1.0.5.tar.gz
tar xvfz snzip-1.0.5.tar.gz
cd snzip-1.0.5
./configure --with-default-format=snzip
make
make install
  • ps 基于 snappy 1.1.10 构建,需要为 Makefile 打补丁,参考 的 snappy_1.1.10-1.debian.tar.xz,如下:
build_static_lib.patch ...
$ cat patches/build_static_lib.patch
Description: add static library build
 Add upstream missed static library target.
Author: Laszlo Boszormenyi (GCS) <gcs@debian.org>
Last-Update: 2021-12-04

---

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -249,6 +249,25 @@ if(BUILD_SHARED_LIBS)
   set_target_properties(snappy PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif(BUILD_SHARED_LIBS)

+ADD_LIBRARY(snappy-static STATIC
+        snappy-c.cc
+        snappy-c.h
+        snappy-sinksource.cc
+        snappy-sinksource.h
+        snappy-stubs-internal.cc
+        snappy-stubs-public.h
+        snappy.cc
+        snappy.h)
+
+target_include_directories(snappy-static
+  PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
+    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:include>
+)
+
+SET_TARGET_PROPERTIES(snappy-static PROPERTIES OUTPUT_NAME snappy)
+
 if(SNAPPY_BUILD_TESTS OR SNAPPY_BUILD_BENCHMARKS)
   add_library(snappy_test_support "")
   target_sources(snappy_test_support
@@ -359,7 +378,7 @@ endif(SNAPPY_FUZZING_BUILD)
 include(GNUInstallDirs)

 if(SNAPPY_INSTALL)
-  install(TARGETS snappy
+  install(TARGETS snappy snappy-static
     EXPORT SnappyTargets
     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}

help

snzip -h ...
$ snzip -h
snzip 1.0.5

  Usage: snzip [option ...] [file ...]

  general options:
   -c       output to standard output, keep original files unchanged
   -d       decompress
   -k       keep (don't delete) input files
   -t name  file format name. see below. The default format is snzip.
   -h       give this help

  raw_format option:
   -s size  size of input data when compressing.
            The default value is the file size if available.

  tuning options:
   -b num   internal block size in bytes
   -B num   internal block size. 'num'-th power of two.
   -R num   size of read buffer in bytes
   -W num   size of write buffer in bytes
   -T       trace for debug

  supported formats:
    NAME            SUFFIX  URL
    ----            ------  ---
    framing2        sz      https://github.com/google/snappy/blob/master/framing_format.txt
    hadoop-snappy   snappy  https://code.google.com/p/hadoop-snappy/
    raw             raw     https://github.com/google/snappy/blob/master/format_description.txt
    iwa             iwa     https://github.com/obriensp/iWorkFileFormat/blob/master/Docs/index.md#snappy-compression
    framing         sz      https://github.com/google/snappy/blob/0755c815197dacc77d8971ae917c86d7aa96bf8e/framing_format.txt
    snzip           snz     https://github.com/kubo/snzip
    snappy-java     snappy  https://github.com/xerial/snappy-java
    snappy-in-java  snappy  https://github.com/dain/snappy
    comment-43      snappy  http://code.google.com/p/snappy/issues/detail?id=34#c43

使用示例

# 压缩
$ snzip snzip-1.0.5.tar.gz

# 解压
$ snzip -d snzip-1.0.5.tar.gz.snz

lib

Python

  • 安装
pip install python-snappy
  • 使用
# python2
import snappy
compressed = open('/tmp/temp.snappy').read()
snappy.uncompress(compressed)

# python3
import snappy
compressed = open('/tmp/temp.snappy','rb').read()
print(snappy.uncompress(compressed).decode(encoding='utf-8',errors="ignore"))

参考

  1. https://google.github.io/snappy/
  2. https://zh.wikipedia.org/wiki/Snappy
Home Archives Categories Tags Statistics
本文总阅读量 次 本站总访问量 次 本站总访客数